library(ggplot2)
library(knitr)
library(dplyr)
library(org.Mm.eg.db)
library(DT)
library(GeneOverlap)
library(GO.db)
library(KEGGREST)
library(KEGG.db)
library(fgsea)
library(maftools)
library(pheatmap)
library(ensembldb)
library(stringr)
library(ensembldb)
library(BSgenome.Mmusculus.GENCODE.GRCm38.102)
library(biomaRt)
library(ggpubr)
base_dir <- "/media/theron/My_Passport/Ali_data/HTMBmice_RNAseq_Data/MB01JHU504/MB01JHU504_000_analysis/DESeq2/refseq_genes"
PD1_vs_combo <- read.table(sprintf("%s/%s",base_dir,"PD1_vs_Combo_genes_DESeq2.txt"),header=T)
untreated_vs_combo <- read.table(sprintf("%s/%s",base_dir,"Untreated_vs_Combo_genes_DESeq2.txt"),header=T)
untreated_vs_PD1 <- read.table(sprintf("%s/%s",base_dir,"Untreated_vs_PD1_genes_DESeq2.txt"),header=T)
which_sig <- vapply(as.numeric(PD1_vs_combo$PD1_vs_Combo_pvalue),function(val){
if (is.na(val)){
return("Insig")
}
if (val <= 0.05){
return("Sig")
} else {
return("Insig")
}},character(1))
PD1_vs_combo$significance_pval <- which_sig
which_sig <- vapply(PD1_vs_combo$PD1_vs_Combo_padj,function(val){
if (is.na(val)){
return("Insig")
}
if (val <= 0.05){
return("Sig")
} else {
return("Insig")
}},character(1))
PD1_vs_combo$significance_padj <- which_sig
PD1_vs_combo_table <- PD1_vs_combo %>% dplyr::filter(!is.na(PD1_vs_Combo_pvalue) | !is.na(PD1_vs_Combo_padj))
datatable(PD1_vs_combo_table)
ggplot(PD1_vs_combo,aes(x=PD1_vs_Combo_log2FoldChange, y=-log10(PD1_vs_Combo_pvalue), color = significance_pval))+
geom_text(label=PD1_vs_combo$gene_symbol)
ggplot(PD1_vs_combo,aes(x=PD1_vs_Combo_log2FoldChange, y=-log10(PD1_vs_Combo_padj), color = significance_padj))+
geom_text(label=PD1_vs_combo$gene_symbol)
## gsea results on GO and KEGG terms
symbol_to_entrez <- as.list(org.Mm.egALIAS2EG)
PD1_vs_combo$entrez <- symbol_to_entrez[PD1_vs_combo$gene_symbol]
SYMBOLToGO <- mapIds(org.Mm.eg.db,keys=PD1_vs_combo$gene_symbol,column='GO',keytype = 'SYMBOL',multiVals = list)
GOToSYMBOL <- sapply(reverseSplit(SYMBOLToGO),unique)
GOToSYMBOL <- GOToSYMBOL[sapply(GOToSYMBOL,length)>5]
SYMBOLToKEGG <- mapIds(org.Mm.eg.db,keys=PD1_vs_combo$gene_symbol,column='PATH',keytype = 'SYMBOL',multiVals = list)
KEGGToSYMBOL <- sapply(reverseSplit(SYMBOLToKEGG),unique)
KEGGToSYMBOL <- KEGGToSYMBOL[sapply(KEGGToSYMBOL,length)>5]
diff_dat_filt <- PD1_vs_combo %>% dplyr::filter(!is.na(PD1_vs_Combo_log2FoldChange) & !is.na(PD1_vs_Combo_pvalue))
ranks <- diff_dat_filt$PD1_vs_Combo_log2FoldChange*-log10(diff_dat_filt$PD1_vs_Combo_pvalue)
names(ranks)<-diff_dat_filt$gene_symbol
ranks<-ranks[unname(!is.na(ranks))]
# ranks<-rank(ranks,ties.method = "random")
fgseaRes_GO <- fgsea(GOToSYMBOL, ranks, eps=0,scoreType="pos")
fgseaRes_GO<-cbind(sapply(c('ONTOLOGY','TERM','DEFINITION'),
function(x){mapIds(GO.db,keys=fgseaRes_GO$pathway,keytype='GOID',column=x)}),fgseaRes_GO)
fgseaRes_KEGG <- fgsea(KEGGToSYMBOL, ranks, eps=0,scoreType="pos")
fgseaRes_KEGG <- cbind(KEGG.Name=unlist(as.list(KEGGPATHID2NAME)[fgseaRes_KEGG$pathway]),
fgseaRes_KEGG)
pathways<- readRDS("/media/theron/My_Passport/LM_LMT_LUCIANE_cellranger/references/gene_sets/mouse_hallmark.rds")
pathways_list <- lapply(unique(pathways$gs_name),function(pathway){
pathway_small <- pathways %>% dplyr::filter(gs_name == pathway)
return(pathway_small$gene_symbol)
})
names(pathways_list)<-unique(pathways$gs_name)
fgseaRes_HALLMARK <- fgsea(pathways_list, ranks, eps=0)
fgseaRes_HALLMARK <- fgseaRes_HALLMARK %>% dplyr::filter(padj <= 0.05)
GO_table <- fgseaRes_GO %>% dplyr::filter(padj <= 0.05)
GO_table<-GO_table[,c("TERM","DEFINITION","padj","NES","size")]
datatable(GO_table)
KEGG_table <- fgseaRes_KEGG %>% dplyr::filter(padj <= 0.05)
KEGG_table<-KEGG_table[,c("KEGG.Name","padj","NES","size")]
datatable(KEGG_table)
datatable(fgseaRes_HALLMARK[,c(1,3,6,7)])
which_sig <- vapply(as.numeric(untreated_vs_combo$Untreated_vs_Combo_pvalue),function(val){
if (is.na(val)){
return("Insig")
}
if (val <= 0.05){
return("Sig")
} else {
return("Insig")
}},character(1))
untreated_vs_combo$significance_pval <- which_sig
which_sig <- vapply(untreated_vs_combo$Untreated_vs_Combo_padj,function(val){
if (is.na(val)){
return("Insig")
}
if (val <= 0.05){
return("Sig")
} else {
return("Insig")
}},character(1))
untreated_vs_combo$significance_padj <- which_sig
untreated_vs_combo_table <- untreated_vs_combo %>% dplyr::filter(!is.na(Untreated_vs_Combo_pvalue) | !is.na(Untreated_vs_Combo_padj))
datatable(untreated_vs_combo_table)
ggplot(untreated_vs_combo,aes(x=Untreated_vs_Combo_log2FoldChange, y=-log10(Untreated_vs_Combo_pvalue), color = significance_pval))+
geom_text(label=untreated_vs_combo$gene_symbol)
ggplot(untreated_vs_combo,aes(x=Untreated_vs_Combo_log2FoldChange, y=-log10(Untreated_vs_Combo_padj), color = significance_padj))+
geom_text(label=untreated_vs_combo$gene_symbol)
symbol_to_entrez <- as.list(org.Mm.egALIAS2EG)
untreated_vs_combo$entrez <- symbol_to_entrez[untreated_vs_combo$gene_symbol]
SYMBOLToGO <- mapIds(org.Mm.eg.db,keys=untreated_vs_combo$gene_symbol,column='GO',keytype = 'SYMBOL',multiVals = list)
GOToSYMBOL <- sapply(reverseSplit(SYMBOLToGO),unique)
GOToSYMBOL <- GOToSYMBOL[sapply(GOToSYMBOL,length)>5]
SYMBOLToKEGG <- mapIds(org.Mm.eg.db,keys=untreated_vs_combo$gene_symbol,column='PATH',keytype = 'SYMBOL',multiVals = list)
KEGGToSYMBOL <- sapply(reverseSplit(SYMBOLToKEGG),unique)
KEGGToSYMBOL <- KEGGToSYMBOL[sapply(KEGGToSYMBOL,length)>5]
diff_dat_filt <- untreated_vs_combo %>% dplyr::filter(!is.na(Untreated_vs_Combo_log2FoldChange) & !is.na(Untreated_vs_Combo_pvalue))
ranks <- diff_dat_filt$Untreated_vs_Combo_log2FoldChange*-log10(diff_dat_filt$Untreated_vs_Combo_pvalue)
names(ranks)<-diff_dat_filt$gene_symbol
ranks<-ranks[unname(!is.na(ranks))]
# ranks<-rank(ranks,ties.method = "random")
fgseaRes_GO <- fgsea(GOToSYMBOL, ranks, eps=0,scoreType="pos")
fgseaRes_GO<-cbind(sapply(c('ONTOLOGY','TERM','DEFINITION'),
function(x){mapIds(GO.db,keys=fgseaRes_GO$pathway,keytype='GOID',column=x)}),fgseaRes_GO)
fgseaRes_KEGG <- fgsea(KEGGToSYMBOL, ranks, eps=0,scoreType="pos")
fgseaRes_KEGG <- cbind(KEGG.Name=unlist(as.list(KEGGPATHID2NAME)[fgseaRes_KEGG$pathway]),
fgseaRes_KEGG)
pathways_list <- readRDS("/media/theron/My_Passport/LM_LMT_LUCIANE_cellranger/references/gene_sets/pathways_list_ensembl.rds")
fgseaRes_HALLMARK <- fgsea(pathways_list, ranks, eps=0)
fgseaRes_HALLMARK <- fgseaRes_HALLMARK %>% dplyr::filter(padj <= 0.05)
GO_table <- fgseaRes_GO %>% dplyr::filter(padj <= 0.05)
GO_table<-GO_table[,c("TERM","DEFINITION","padj","NES","size")]
datatable(GO_table)
KEGG_table <- fgseaRes_KEGG %>% dplyr::filter(padj <= 0.05)
KEGG_table<-KEGG_table[,c("KEGG.Name","padj","NES","size")]
datatable(KEGG_table)
datatable(fgseaRes_HALLMARK[,c(1,3,6,7)])
which_sig <- vapply(as.numeric(untreated_vs_PD1$Untreated_vs_PD1_pvalue),function(val){
if (is.na(val)){
return("Insig")
}
if (val <= 0.05){
return("Sig")
} else {
return("Insig")
}},character(1))
untreated_vs_PD1$significance_pval <- which_sig
which_sig <- vapply(untreated_vs_PD1$Untreated_vs_PD1_padj,function(val){
if (is.na(val)){
return("Insig")
}
if (val <= 0.05){
return("Sig")
} else {
return("Insig")
}},character(1))
untreated_vs_PD1$significance_padj <- which_sig
untreated_vs_PD1_table <- untreated_vs_PD1 %>% dplyr::filter(!is.na(Untreated_vs_PD1_pvalue) | !is.na(Untreated_vs_PD1_padj))
datatable(untreated_vs_PD1_table)
ggplot(untreated_vs_PD1,aes(x=Untreated_vs_PD1_log2FoldChange, y=-log10(Untreated_vs_PD1_pvalue), color = significance_pval))+
geom_text(label=untreated_vs_PD1$gene_symbol)
ggplot(untreated_vs_PD1,aes(x=Untreated_vs_PD1_log2FoldChange, y=-log10(Untreated_vs_PD1_padj), color = significance_padj))+
geom_text(label=untreated_vs_PD1$gene_symbol)
symbol_to_entrez <- as.list(org.Mm.egALIAS2EG)
untreated_vs_PD1$entrez <- symbol_to_entrez[untreated_vs_PD1$gene_symbol]
SYMBOLToGO <- mapIds(org.Mm.eg.db,keys=untreated_vs_PD1$gene_symbol,column='GO',keytype = 'SYMBOL',multiVals = list)
GOToSYMBOL <- sapply(reverseSplit(SYMBOLToGO),unique)
GOToSYMBOL <- GOToSYMBOL[sapply(GOToSYMBOL,length)>5]
SYMBOLToKEGG <- mapIds(org.Mm.eg.db,keys=untreated_vs_PD1$gene_symbol,column='PATH',keytype = 'SYMBOL',multiVals = list)
KEGGToSYMBOL <- sapply(reverseSplit(SYMBOLToKEGG),unique)
KEGGToSYMBOL <- KEGGToSYMBOL[sapply(KEGGToSYMBOL,length)>5]
diff_dat_filt <- untreated_vs_PD1 %>% dplyr::filter(!is.na(Untreated_vs_PD1_log2FoldChange) & !is.na(Untreated_vs_PD1_pvalue))
ranks <- diff_dat_filt$Untreated_vs_PD1_log2FoldChange*-log10(diff_dat_filt$Untreated_vs_PD1_pvalue)
names(ranks)<-diff_dat_filt$gene_symbol
ranks<-ranks[unname(!is.na(ranks))]
# ranks<-rank(ranks,ties.method = "random")
fgseaRes_GO <- fgsea(GOToSYMBOL, ranks, eps=0,scoreType="pos")
fgseaRes_GO<-cbind(sapply(c('ONTOLOGY','TERM','DEFINITION'),
function(x){mapIds(GO.db,keys=fgseaRes_GO$pathway,keytype='GOID',column=x)}),fgseaRes_GO)
fgseaRes_KEGG <- fgsea(KEGGToSYMBOL, ranks, eps=0,scoreType="pos")
fgseaRes_KEGG <- cbind(KEGG.Name=unlist(as.list(KEGGPATHID2NAME)[fgseaRes_KEGG$pathway]),
fgseaRes_KEGG)
pathways_list <- readRDS("/media/theron/My_Passport/LM_LMT_LUCIANE_cellranger/references/gene_sets/pathways_list_ensembl.rds")
fgseaRes_HALLMARK <- fgsea(pathways_list, ranks, eps=0)
fgseaRes_HALLMARK <- fgseaRes_HALLMARK %>% dplyr::filter(padj <= 0.05)
GO_table <- fgseaRes_GO %>% dplyr::filter(padj <= 0.05)
GO_table<-GO_table[,c("TERM","DEFINITION","padj","NES","size")]
datatable(GO_table)
KEGG_table <- fgseaRes_KEGG %>% dplyr::filter(padj <= 0.05)
KEGG_table<-KEGG_table[,c("KEGG.Name","padj","NES","size")]
datatable(KEGG_table)
datatable(fgseaRes_HALLMARK[,c(1,3,6,7)])
maf_file <- "/media/theron/My_Passport/Ali_data/WES Data for 4T1MIS/MB01JHU503/MB01JHU503_000_analysis/MAF/DNA_4TIMSI_haplotypecaller.maf"
ali_maf = read.maf(maf_file)
## -Reading
## -Validating
## -Silent variants: 817928
## -Summarizing
## -Processing clinical data
## --Missing clinical data
## -Finished in 12.7s elapsed (22.0s cpu)
ali_gene_summ = getGeneSummary(ali_maf)
rownames(ali_gene_summ) <- ali_gene_summ$Hugo_Symbol
rownames(PD1_vs_combo) <- PD1_vs_combo$gene_symbol
rownames(untreated_vs_combo) <- untreated_vs_combo$gene_symbol
rownames(untreated_vs_PD1) <- untreated_vs_PD1$gene_symbol
ali_gene_summ <- cbind(ali_gene_summ,PD1_vs_combo[ali_gene_summ$Hugo_Symbol,c("PD1_vs_Combo_log2FoldChange","significance_pval","significance_padj")])
ali_gene_summ <- cbind(ali_gene_summ,untreated_vs_combo[ali_gene_summ$Hugo_Symbol,c("Untreated_vs_Combo_log2FoldChange","significance_pval","significance_padj")])
ali_gene_summ <- cbind(ali_gene_summ,untreated_vs_PD1[ali_gene_summ$Hugo_Symbol,c("Untreated_vs_PD1_log2FoldChange","significance_pval","significance_padj")])
cols <- colnames(ali_gene_summ)
cols[seq(15,22)]<-c("pc_significance_pval","pc_significance_padj",
"Untreated_vs_Combo_log2FoldChange","uc_significance_pval","uc_significance_padj",
"Untreated_vs_PD1_log2FoldChange","up_significance_pval","up_significance_padj")
colnames(ali_gene_summ) <- cols
datatable(ali_gene_summ)
ali_gene_summ_PD1 <- ali_gene_summ %>% dplyr::filter(pc_significance_pval=="Sig")
ggplot(ali_gene_summ_PD1,aes(y=log2(Missense_Mutation),x=PD1_vs_Combo_log2FoldChange))+
geom_text(label=ali_gene_summ_PD1$Hugo_Symbol)+labs(title="pval Significant Genes")
datatable(ali_gene_summ_PD1)
ali_gene_summ_PD1 <- ali_gene_summ %>% dplyr::filter(pc_significance_padj=="Sig")
ggplot(ali_gene_summ_PD1,aes(y=log2(Missense_Mutation),x=PD1_vs_Combo_log2FoldChange))+
geom_text(label=ali_gene_summ_PD1$Hugo_Symbol)+labs(title="padj Significant Genes")
datatable(ali_gene_summ_PD1)
ali_gene_summ_uc <- ali_gene_summ %>% dplyr::filter(uc_significance_pval=="Sig")
ggplot(ali_gene_summ_uc,aes(y=log2(Missense_Mutation),x=Untreated_vs_Combo_log2FoldChange))+
geom_text(label=ali_gene_summ_uc$Hugo_Symbol)+labs(title="pval Significant Genes")
datatable(ali_gene_summ_uc)
ali_gene_summ_uc <- ali_gene_summ %>% dplyr::filter(uc_significance_padj=="Sig")
ggplot(ali_gene_summ_uc,aes(y=log2(Missense_Mutation),x=Untreated_vs_Combo_log2FoldChange))+
geom_text(label=ali_gene_summ_uc$Hugo_Symbol)+labs(title="padj Significant Genes")
datatable(ali_gene_summ_uc)
ali_gene_summ_up <- ali_gene_summ %>% dplyr::filter(up_significance_pval=="Sig")
ggplot(ali_gene_summ_up,aes(y=log2(Missense_Mutation),x=Untreated_vs_PD1_log2FoldChange))+
geom_text(label=ali_gene_summ_up$Hugo_Symbol)+labs(title="pval Significant Genes")
datatable(ali_gene_summ_up)
ali_gene_summ_up <- ali_gene_summ %>% dplyr::filter(up_significance_padj=="Sig")
ggplot(ali_gene_summ_up,aes(y=log2(Missense_Mutation),x=Untreated_vs_PD1_log2FoldChange))+
geom_text(label=ali_gene_summ_up$Hugo_Symbol)+labs(title="padj Significant Genes")
datatable(ali_gene_summ_up)
tpm_gene_expression_file <- "/media/theron/My_Passport/Ali_data/HTMBmice_RNAseq_Data/MB01JHU504/MB01JHU504_000_analysis/RSEM/tables/genes/genes_tpm_all_samples_norm.txt"
tpm_gene_expression <- read.table(tpm_gene_expression_file,header=T,sep="\t")
maf_file <- "/media/theron/My_Passport/Ali_data/WES Data for 4T1MIS/MB01JHU503/MB01JHU503_000_analysis/MAF/DNA_4TIMSI_haplotypecaller.maf"
ali_maf = read.maf(maf_file)
## -Reading
## -Validating
## -Silent variants: 817928
## -Summarizing
## -Processing clinical data
## --Missing clinical data
## -Finished in 7.081s elapsed (15.7s cpu)
ali_gene_summ = as.data.frame(getGeneSummary(ali_maf))
rownames(ali_gene_summ) <- ali_gene_summ$Hugo_Symbol
missense_dat <- ali_gene_summ[tpm_gene_expression$gene_id,]
missense_dat <- missense_dat[complete.cases(missense_dat),]
rownames(tpm_gene_expression) <- tpm_gene_expression$gene_id
Missense_Mutation<-missense_dat$Missense_Mutation
tpm_gene_expression <- cbind(tpm_gene_expression[rownames(missense_dat),seq(2,ncol(tpm_gene_expression))],Missense_Mutation)
tpm_gene_expression <- tpm_gene_expression[complete.cases(tpm_gene_expression),]
combo<-apply(tpm_gene_expression[,seq(6)],1,mean)
pd1<-apply(tpm_gene_expression[,seq(7,12)],1,mean)
untreated<-apply(tpm_gene_expression[,seq(12,ncol(tpm_gene_expression))],1,mean)
Missense_Mutation<-tpm_gene_expression$Missense_Mutation
average_expression<-data.frame(c(combo,pd1,untreated),
rep(Missense_Mutation,3),
c(rep("combo",length(combo)),
rep("pd1",length(pd1)),
rep("untreated",length(untreated))))
colnames(average_expression)<-c("TPM_expression","Missense_Mutation","type")
average_expression_compared<-data.frame(combo,pd1,untreated,Missense_Mutation)
ggplot(average_expression,aes(x=log2(Missense_Mutation+1),y=log2(TPM_expression+1),color=type))+geom_point(shape=1)
C1_clusters_file <- "/media/theron/My_Passport/Ali_data/analysis/C1_pVals.txt"
C1_clusters <- read.table(C1_clusters_file,header=T)
C1_clusters$juncs <- vapply(rownames(C1_clusters),function(val){
all_info <- strsplit(val,":")[[1]]
strand <- strsplit(all_info[4],"_")[[1]][3]
paste(c(all_info[seq(3)],strand),collapse=":")
},character(1))
C1_clusters_sig <- C1_clusters %>% dplyr::filter(C1 <= 0.05)
C2_clusters_file <- "/media/theron/My_Passport/Ali_data/analysis/C2_pVals.txt"
C2_clusters <- read.table(C2_clusters_file,header=T)
C2_clusters$juncs <- vapply(rownames(C2_clusters),function(val){
all_info <- strsplit(val,":")[[1]]
strand <- strsplit(all_info[4],"_")[[1]][3]
paste(c(all_info[seq(3)],strand),collapse=":")
},character(1))
C2_clusters_sig <- C2_clusters %>% dplyr::filter(C2 <= 0.05)
C3_clusters_file <- "/media/theron/My_Passport/Ali_data/analysis/C3_pVals.txt"
C3_clusters <- read.table(C3_clusters_file,header=T)
C3_clusters$juncs <- vapply(rownames(C3_clusters),function(val){
all_info <- strsplit(val,":")[[1]]
strand <- strsplit(all_info[4],"_")[[1]][3]
paste(c(all_info[seq(3)],strand),collapse=":")
},character(1))
C3_clusters_sig <- C3_clusters %>% dplyr::filter(C3 <= 0.05)
C5_clusters_file <- "/media/theron/My_Passport/Ali_data/analysis/C5_pVals.txt"
C5_clusters <- read.table(C5_clusters_file,header=T)
C5_clusters$juncs <- vapply(rownames(C5_clusters),function(val){
all_info <- strsplit(val,":")[[1]]
strand <- strsplit(all_info[4],"_")[[1]][3]
paste(c(all_info[seq(3)],strand),collapse=":")
},character(1))
C5_clusters_sig <- C5_clusters %>% dplyr::filter(C5 <= 0.05)
C6_clusters_file <- "/media/theron/My_Passport/Ali_data/analysis/C6_pVals.txt"
C6_clusters <- read.table(C6_clusters_file,header=T)
C6_clusters$juncs <- vapply(rownames(C6_clusters),function(val){
all_info <- strsplit(val,":")[[1]]
strand <- strsplit(all_info[4],"_")[[1]][3]
paste(c(all_info[seq(3)],strand),collapse=":")
},character(1))
C6_clusters_sig <- C6_clusters %>% dplyr::filter(C6 <= 0.05)
C7_clusters_file <- "/media/theron/My_Passport/Ali_data/analysis/C7_pVals.txt"
C7_clusters <- read.table(C7_clusters_file,header=T)
C7_clusters_sig <- C7_clusters %>% dplyr::filter(C7 <= 0.05)
C7_clusters_sig$juncs <- vapply(rownames(C7_clusters_sig),function(val){
all_info <- strsplit(val,":")[[1]]
strand <- strsplit(all_info[4],"_")[[1]][3]
paste(c(all_info[seq(3)],strand),collapse=":")
},character(1))
P1_clusters_file <- "/media/theron/My_Passport/Ali_data/analysis/P1_pVals.txt"
P1_clusters <- read.table(P1_clusters_file,header=T)
P1_clusters$juncs <- vapply(rownames(P1_clusters),function(val){
all_info <- strsplit(val,":")[[1]]
strand <- strsplit(all_info[4],"_")[[1]][3]
paste(c(all_info[seq(3)],strand),collapse=":")
},character(1))
P1_clusters_sig <- P1_clusters %>% dplyr::filter(P1 <= 0.05)
P2_clusters_file <- "/media/theron/My_Passport/Ali_data/analysis/P2_pVals.txt"
P2_clusters <- read.table(P2_clusters_file,header=T)
P2_clusters$juncs <- vapply(rownames(P2_clusters),function(val){
all_info <- strsplit(val,":")[[1]]
strand <- strsplit(all_info[4],"_")[[1]][3]
paste(c(all_info[seq(3)],strand),collapse=":")
},character(1))
P2_clusters_sig <- P2_clusters %>% dplyr::filter(P2 <= 0.05)
P3_clusters_file <- "/media/theron/My_Passport/Ali_data/analysis/P3_pVals.txt"
P3_clusters <- read.table(P3_clusters_file,header=T)
P3_clusters$juncs <- vapply(rownames(P3_clusters),function(val){
all_info <- strsplit(val,":")[[1]]
strand <- strsplit(all_info[4],"_")[[1]][3]
paste(c(all_info[seq(3)],strand),collapse=":")
},character(1))
P3_clusters_sig <- P3_clusters %>% dplyr::filter(P3 <= 0.05)
P5_clusters_file <- "/media/theron/My_Passport/Ali_data/analysis/P5_pVals.txt"
P5_clusters <- read.table(P5_clusters_file,header=T)
P5_clusters$juncs <- vapply(rownames(P5_clusters),function(val){
all_info <- strsplit(val,":")[[1]]
strand <- strsplit(all_info[4],"_")[[1]][3]
paste(c(all_info[seq(3)],strand),collapse=":")
},character(1))
P5_clusters_sig <- P5_clusters %>% dplyr::filter(P5 <= 0.05)
P6_clusters_file <- "/media/theron/My_Passport/Ali_data/analysis/P6_pVals.txt"
P6_clusters <- read.table(P6_clusters_file,header=T)
P6_clusters$juncs <- vapply(rownames(P6_clusters),function(val){
all_info <- strsplit(val,":")[[1]]
strand <- strsplit(all_info[4],"_")[[1]][3]
paste(c(all_info[seq(3)],strand),collapse=":")
},character(1))
P6_clusters_sig <- P6_clusters %>% dplyr::filter(P6 <= 0.05)
P7_clusters_file <- "/media/theron/My_Passport/Ali_data/analysis/P7_pVals.txt"
P7_clusters <- read.table(P7_clusters_file,header=T)
P7_clusters$juncs <- vapply(rownames(P7_clusters),function(val){
all_info <- strsplit(val,":")[[1]]
strand <- strsplit(all_info[4],"_")[[1]][3]
paste(c(all_info[seq(3)],strand),collapse=":")
},character(1))
P7_clusters_sig <- P7_clusters %>% dplyr::filter(P7 <= 0.05)
juncs_sig <- unique(c(C1_clusters_sig$juncs,C2_clusters_sig$juncs,
C3_clusters_sig$juncs,C5_clusters_sig$juncs,
C6_clusters_sig$juncs,C7_clusters_sig$juncs,
P1_clusters_sig$juncs,P2_clusters_sig$juncs,
P3_clusters_sig$juncs,P5_clusters_sig$juncs,
P6_clusters_sig$juncs,P7_clusters_sig$juncs))
juncs_all <- unique(c(C1_clusters$juncs,C2_clusters$juncs,
C3_clusters$juncs,C5_clusters$juncs,
C6_clusters$juncs,C7_clusters$juncs,
P1_clusters$juncs,P2_clusters$juncs,
P3_clusters$juncs,P5_clusters$juncs,
P6_clusters$juncs,P7_clusters$juncs))
all_sample_pvals <- data.frame(juncs_all)
rownames(all_sample_pvals) <- juncs_all
all_sample_pvals$C1 <- NA
all_sample_pvals[C1_clusters$juncs,"C1"]<-C1_clusters$C1
all_sample_pvals$C2 <- NA
all_sample_pvals[C2_clusters$juncs,"C2"]<-C2_clusters$C2
all_sample_pvals$C3 <- NA
all_sample_pvals[C3_clusters$juncs,"C3"]<-C3_clusters$C3
all_sample_pvals$C5 <- NA
all_sample_pvals[C5_clusters$juncs,"C5"]<-C5_clusters$C5
all_sample_pvals$C6 <- NA
all_sample_pvals[C6_clusters$juncs,"C6"]<-C6_clusters$C6
all_sample_pvals$C7 <- NA
all_sample_pvals[C7_clusters$juncs,"C7"]<-C7_clusters$C7
all_sample_pvals$P1 <- NA
all_sample_pvals[P1_clusters$juncs,"P1"]<-P1_clusters$P1
all_sample_pvals$P2 <- NA
all_sample_pvals[P2_clusters$juncs,"P2"]<-P2_clusters$P2
all_sample_pvals$P3 <- NA
all_sample_pvals[P3_clusters$juncs,"P3"]<-P3_clusters$P3
all_sample_pvals$P5 <- NA
all_sample_pvals[P5_clusters$juncs,"P5"]<-P5_clusters$P5
all_sample_pvals$P6 <- NA
all_sample_pvals[P6_clusters$juncs,"P6"]<-P6_clusters$P6
all_sample_pvals$P7 <- NA
all_sample_pvals[P7_clusters$juncs,"P7"]<-P7_clusters$P7
all_sample_pvals<-all_sample_pvals[juncs_sig,seq(2,ncol(all_sample_pvals))]
C1_clusters_file <- "/media/theron/My_Passport/Ali_data/analysis/C1_perind.counts.gz"
C1_clusters <- read.table(C1_clusters_file,header=T)
rownames(C1_clusters) <- C1_clusters$chrom
C1_clusters$juncs <- vapply(rownames(C1_clusters),function(val){
all_info <- strsplit(val,":")[[1]]
strand <- strsplit(all_info[4],"_")[[1]][3]
paste(c(all_info[seq(3)],strand),collapse=":")
},character(1))
C1_clusters_psi <- C1_clusters[rownames(C1_clusters_sig),]
C2_clusters_file <- "/media/theron/My_Passport/Ali_data/analysis/C2_perind.counts.gz"
C2_clusters <- read.table(C2_clusters_file,header=T)
rownames(C2_clusters) <- C2_clusters$chrom
C2_clusters$juncs <- vapply(rownames(C2_clusters),function(val){
all_info <- strsplit(val,":")[[1]]
strand <- strsplit(all_info[4],"_")[[1]][3]
paste(c(all_info[seq(3)],strand),collapse=":")
},character(1))
C2_clusters_psi <- C2_clusters[rownames(C2_clusters_sig),]
C3_clusters_file <- "/media/theron/My_Passport/Ali_data/analysis/C3_perind.counts.gz"
C3_clusters <- read.table(C3_clusters_file,header=T)
rownames(C3_clusters) <- C3_clusters$chrom
C3_clusters$juncs <- vapply(rownames(C3_clusters),function(val){
all_info <- strsplit(val,":")[[1]]
strand <- strsplit(all_info[4],"_")[[1]][3]
paste(c(all_info[seq(3)],strand),collapse=":")
},character(1))
C3_clusters_psi <- C3_clusters[rownames(C3_clusters_sig),]
C5_clusters_file <- "/media/theron/My_Passport/Ali_data/analysis/C5_perind.counts.gz"
C5_clusters <- read.table(C5_clusters_file,header=T)
rownames(C5_clusters) <- C5_clusters$chrom
C5_clusters$juncs <- vapply(rownames(C5_clusters),function(val){
all_info <- strsplit(val,":")[[1]]
strand <- strsplit(all_info[4],"_")[[1]][3]
paste(c(all_info[seq(3)],strand),collapse=":")
},character(1))
C5_clusters_psi <- C5_clusters[rownames(C5_clusters_sig),]
C6_clusters_file <- "/media/theron/My_Passport/Ali_data/analysis/C6_perind.counts.gz"
C6_clusters <- read.table(C6_clusters_file,header=T)
rownames(C6_clusters) <- C6_clusters$chrom
C6_clusters$juncs <- vapply(rownames(C6_clusters),function(val){
all_info <- strsplit(val,":")[[1]]
strand <- strsplit(all_info[4],"_")[[1]][3]
paste(c(all_info[seq(3)],strand),collapse=":")
},character(1))
C6_clusters_psi <- C6_clusters[rownames(C6_clusters_sig),]
C7_clusters_file <- "/media/theron/My_Passport/Ali_data/analysis/C7_perind.counts.gz"
C7_clusters <- read.table(C7_clusters_file,header=T)
rownames(C7_clusters) <- C7_clusters$chrom
C7_clusters$juncs <- vapply(rownames(C7_clusters),function(val){
all_info <- strsplit(val,":")[[1]]
strand <- strsplit(all_info[4],"_")[[1]][3]
paste(c(all_info[seq(3)],strand),collapse=":")
},character(1))
C7_clusters_psi <- C7_clusters[rownames(C7_clusters_sig),]
P1_clusters_file <- "/media/theron/My_Passport/Ali_data/analysis/P1_perind.counts.gz"
P1_clusters <- read.table(P1_clusters_file,header=T)
rownames(P1_clusters) <- P1_clusters$chrom
P1_clusters$juncs <- vapply(rownames(P1_clusters),function(val){
all_info <- strsplit(val,":")[[1]]
strand <- strsplit(all_info[4],"_")[[1]][3]
paste(c(all_info[seq(3)],strand),collapse=":")
},character(1))
P1_clusters_psi <- P1_clusters[rownames(P1_clusters_sig),]
P2_clusters_file <- "/media/theron/My_Passport/Ali_data/analysis/P2_perind.counts.gz"
P2_clusters <- read.table(P2_clusters_file,header=T)
rownames(P2_clusters) <- P2_clusters$chrom
P2_clusters$juncs <- vapply(rownames(P2_clusters),function(val){
all_info <- strsplit(val,":")[[1]]
strand <- strsplit(all_info[4],"_")[[1]][3]
paste(c(all_info[seq(3)],strand),collapse=":")
},character(1))
P2_clusters_psi <- P2_clusters[rownames(P2_clusters_sig),]
P3_clusters_file <- "/media/theron/My_Passport/Ali_data/analysis/P3_perind.counts.gz"
P3_clusters <- read.table(P3_clusters_file,header=T)
rownames(P3_clusters) <- P3_clusters$chrom
P3_clusters$juncs <- vapply(rownames(P3_clusters),function(val){
all_info <- strsplit(val,":")[[1]]
strand <- strsplit(all_info[4],"_")[[1]][3]
paste(c(all_info[seq(3)],strand),collapse=":")
},character(1))
P3_clusters_psi <- P3_clusters[rownames(P3_clusters_sig),]
P5_clusters_file <- "/media/theron/My_Passport/Ali_data/analysis/P5_perind.counts.gz"
P5_clusters <- read.table(P5_clusters_file,header=T)
rownames(P5_clusters) <- P5_clusters$chrom
P5_clusters$juncs <- vapply(rownames(P5_clusters),function(val){
all_info <- strsplit(val,":")[[1]]
strand <- strsplit(all_info[4],"_")[[1]][3]
paste(c(all_info[seq(3)],strand),collapse=":")
},character(1))
P5_clusters_psi <- P5_clusters[rownames(P5_clusters_sig),]
P6_clusters_file <- "/media/theron/My_Passport/Ali_data/analysis/P6_perind.counts.gz"
P6_clusters <- read.table(P6_clusters_file,header=T)
rownames(P6_clusters) <- P6_clusters$chrom
P6_clusters$juncs <- vapply(rownames(P6_clusters),function(val){
all_info <- strsplit(val,":")[[1]]
strand <- strsplit(all_info[4],"_")[[1]][3]
paste(c(all_info[seq(3)],strand),collapse=":")
},character(1))
P6_clusters_psi <- P6_clusters[rownames(P6_clusters_sig),]
P7_clusters_file <- "/media/theron/My_Passport/Ali_data/analysis/P7_perind.counts.gz"
P7_clusters <- read.table(P7_clusters_file,header=T)
rownames(P7_clusters) <- P7_clusters$chrom
P7_clusters$juncs <- vapply(rownames(P7_clusters),function(val){
all_info <- strsplit(val,":")[[1]]
strand <- strsplit(all_info[4],"_")[[1]][3]
paste(c(all_info[seq(3)],strand),collapse=":")
},character(1))
P7_clusters_psi <- P7_clusters[rownames(P7_clusters_sig),]
juncs_all <- unique(c(C1_clusters$juncs,C2_clusters$juncs,
C3_clusters$juncs,C5_clusters$juncs,
C6_clusters$juncs,C7_clusters$juncs,
P1_clusters$juncs,P2_clusters$juncs,
P3_clusters$juncs,P5_clusters$juncs,
P6_clusters$juncs,P7_clusters$juncs))
all_sample_psi <- data.frame(juncs_all)
rownames(all_sample_psi) <- juncs_all
all_sample_psi$C1 <- NA
all_sample_psi[C1_clusters$juncs,"C1"]<-C1_clusters$C1
all_sample_psi[C1_clusters$juncs,"U1"]<-C1_clusters$U1
all_sample_psi[C1_clusters$juncs,"U2"]<-C1_clusters$U2
all_sample_psi[C1_clusters$juncs,"U3"]<-C1_clusters$U3
all_sample_psi$C2 <- NA
all_sample_psi[C2_clusters$juncs,"C2"]<-C2_clusters$C2
all_sample_psi[C2_clusters$juncs,"U1"]<-C2_clusters$U1
all_sample_psi[C2_clusters$juncs,"U2"]<-C2_clusters$U2
all_sample_psi[C2_clusters$juncs,"U3"]<-C2_clusters$U3
all_sample_psi$C3 <- NA
all_sample_psi[C3_clusters$juncs,"C3"]<-C3_clusters$C3
all_sample_psi[C3_clusters$juncs,"U1"]<-C3_clusters$U1
all_sample_psi[C3_clusters$juncs,"U2"]<-C3_clusters$U2
all_sample_psi[C3_clusters$juncs,"U3"]<-C3_clusters$U3
all_sample_psi$C5 <- NA
all_sample_psi[C5_clusters$juncs,"C5"]<-C5_clusters$C5
all_sample_psi[C5_clusters$juncs,"U1"]<-C5_clusters$U1
all_sample_psi[C5_clusters$juncs,"U2"]<-C5_clusters$U2
all_sample_psi[C5_clusters$juncs,"U3"]<-C5_clusters$U3
all_sample_psi$C6 <- NA
all_sample_psi[C6_clusters$juncs,"C6"]<-C6_clusters$C6
all_sample_psi[C6_clusters$juncs,"U1"]<-C6_clusters$U1
all_sample_psi[C6_clusters$juncs,"U2"]<-C6_clusters$U2
all_sample_psi[C6_clusters$juncs,"U3"]<-C6_clusters$U3
all_sample_psi$C7 <- NA
all_sample_psi[C7_clusters$juncs,"C7"]<-C7_clusters$C7
all_sample_psi[C7_clusters$juncs,"U1"]<-C7_clusters$U1
all_sample_psi[C7_clusters$juncs,"U2"]<-C7_clusters$U2
all_sample_psi[C7_clusters$juncs,"U3"]<-C7_clusters$U3
all_sample_psi$P1 <- NA
all_sample_psi[P1_clusters$juncs,"P1"]<-P1_clusters$P1
all_sample_psi[P1_clusters$juncs,"U1"]<-P1_clusters$U1
all_sample_psi[P1_clusters$juncs,"U2"]<-P1_clusters$U2
all_sample_psi[P1_clusters$juncs,"U3"]<-P1_clusters$U3
all_sample_psi$P2 <- NA
all_sample_psi[P2_clusters$juncs,"P2"]<-P2_clusters$P2
all_sample_psi[P2_clusters$juncs,"U1"]<-P2_clusters$U1
all_sample_psi[P2_clusters$juncs,"U2"]<-P2_clusters$U2
all_sample_psi[P2_clusters$juncs,"U3"]<-P2_clusters$U3
all_sample_psi$P3 <- NA
all_sample_psi[P3_clusters$juncs,"P3"]<-P3_clusters$P3
all_sample_psi[P3_clusters$juncs,"U1"]<-P3_clusters$U1
all_sample_psi[P3_clusters$juncs,"U2"]<-P3_clusters$U2
all_sample_psi[P3_clusters$juncs,"U3"]<-P3_clusters$U3
all_sample_psi$P5 <- NA
all_sample_psi[P5_clusters$juncs,"P5"]<-P5_clusters$P5
all_sample_psi[P5_clusters$juncs,"U1"]<-P5_clusters$U1
all_sample_psi[P5_clusters$juncs,"U2"]<-P5_clusters$U2
all_sample_psi[P5_clusters$juncs,"U3"]<-P5_clusters$U3
all_sample_psi$P6 <- NA
all_sample_psi[P6_clusters$juncs,"P6"]<-P6_clusters$P6
all_sample_psi[P6_clusters$juncs,"U1"]<-P6_clusters$U1
all_sample_psi[P6_clusters$juncs,"U2"]<-P6_clusters$U2
all_sample_psi[P6_clusters$juncs,"U3"]<-P6_clusters$U3
all_sample_psi$P7 <- NA
all_sample_psi[P7_clusters$juncs,"P7"]<-P7_clusters$P7
all_sample_psi[P7_clusters$juncs,"U1"]<-P7_clusters$U1
all_sample_psi[P7_clusters$juncs,"U2"]<-P7_clusters$U2
all_sample_psi[P7_clusters$juncs,"U3"]<-P7_clusters$U3
all_sample_psi<-all_sample_psi[juncs_sig,seq(2,ncol(all_sample_psi))]
Configuring psi matrix to be numeric
cols <- colnames(all_sample_psi)
for (col in cols){
all_sample_psi[,col]<-vapply(all_sample_psi[,col],function(val){
if (is.na(val)){
return(NA)
}
frac<-strsplit(val,"/")[[1]]
frac<-as.numeric(frac)
if (frac[1]==0){
return(0)
}
frac <- frac[1]/frac[2]
},numeric(1))
}
Saving psi and pval matrices
write.table(all_sample_psi,
file="/media/theron/My_Passport/Ali_data/analysis/all_sample_psi.txt",
sep="\t",
quote=F,
col.names=T,
row.names=F)
write.table(all_sample_pvals,
file="/media/theron/My_Passport/Ali_data/analysis/all_sample_pval.txt",
sep="\t",
quote=F,
col.names=T,
row.names=F)
juncs_sig_df <- data.frame(matrix(unlist(strsplit(juncs_sig,":")),nrow=length(juncs_sig),ncol=4,byrow=T))
colnames(juncs_sig_df)<-c("chrom","start","end","strand")
write.table(juncs_sig_df,
file="/media/theron/My_Passport/Ali_data/analysis/juncs_sig.txt",
sep="\t",
quote=F,
col.names=T,
row.names=F)
saveRDS(juncs_sig_df,
file="/media/theron/My_Passport/Ali_data/analysis/juncs_sig.rds")
juncs_all_df <- data.frame(matrix(unlist(strsplit(juncs_all,":")),nrow=length(juncs_all),ncol=4,byrow=T))
colnames(juncs_all_df)<-c("chrom","start","end","strand")
write.table(juncs_all_df,
file="/media/theron/My_Passport/Ali_data/analysis/juncs_all.txt",
sep="\t",
quote=F,
col.names=T,
row.names=F)
C1_clusters_file <- "/media/theron/My_Passport/Ali_data/analysis/C1P_pVals.txt"
C1_clusters <- read.table(C1_clusters_file,header=T)
C1_clusters$juncs <- vapply(rownames(C1_clusters),function(val){
all_info <- strsplit(val,":")[[1]]
strand <- strsplit(all_info[4],"_")[[1]][3]
paste(c(all_info[seq(3)],strand),collapse=":")
},character(1))
C1_clusters_sig <- C1_clusters %>% dplyr::filter(C1 <= 0.05)
C2_clusters_file <- "/media/theron/My_Passport/Ali_data/analysis/C2P_pVals.txt"
C2_clusters <- read.table(C2_clusters_file,header=T)
C2_clusters$juncs <- vapply(rownames(C2_clusters),function(val){
all_info <- strsplit(val,":")[[1]]
strand <- strsplit(all_info[4],"_")[[1]][3]
paste(c(all_info[seq(3)],strand),collapse=":")
},character(1))
C2_clusters_sig <- C2_clusters %>% dplyr::filter(C2 <= 0.05)
C3_clusters_file <- "/media/theron/My_Passport/Ali_data/analysis/C3P_pVals.txt"
C3_clusters <- read.table(C3_clusters_file,header=T)
C3_clusters$juncs <- vapply(rownames(C3_clusters),function(val){
all_info <- strsplit(val,":")[[1]]
strand <- strsplit(all_info[4],"_")[[1]][3]
paste(c(all_info[seq(3)],strand),collapse=":")
},character(1))
C3_clusters_sig <- C3_clusters %>% dplyr::filter(C3 <= 0.05)
C5_clusters_file <- "/media/theron/My_Passport/Ali_data/analysis/C5P_pVals.txt"
C5_clusters <- read.table(C5_clusters_file,header=T)
C5_clusters$juncs <- vapply(rownames(C5_clusters),function(val){
all_info <- strsplit(val,":")[[1]]
strand <- strsplit(all_info[4],"_")[[1]][3]
paste(c(all_info[seq(3)],strand),collapse=":")
},character(1))
C5_clusters_sig <- C5_clusters %>% dplyr::filter(C5 <= 0.05)
C6_clusters_file <- "/media/theron/My_Passport/Ali_data/analysis/C6P_pVals.txt"
C6_clusters <- read.table(C6_clusters_file,header=T)
C6_clusters$juncs <- vapply(rownames(C6_clusters),function(val){
all_info <- strsplit(val,":")[[1]]
strand <- strsplit(all_info[4],"_")[[1]][3]
paste(c(all_info[seq(3)],strand),collapse=":")
},character(1))
C6_clusters_sig <- C6_clusters %>% dplyr::filter(C6 <= 0.05)
C7_clusters_file <- "/media/theron/My_Passport/Ali_data/analysis/C7P_pVals.txt"
C7_clusters <- read.table(C7_clusters_file,header=T)
C7_clusters_sig <- C7_clusters %>% dplyr::filter(C7 <= 0.05)
C7_clusters_sig$juncs <- vapply(rownames(C7_clusters_sig),function(val){
all_info <- strsplit(val,":")[[1]]
strand <- strsplit(all_info[4],"_")[[1]][3]
paste(c(all_info[seq(3)],strand),collapse=":")
},character(1))
P1_clusters_file <- "/media/theron/My_Passport/Ali_data/analysis/P1C_pVals.txt"
P1_clusters <- read.table(P1_clusters_file,header=T)
P1_clusters$juncs <- vapply(rownames(P1_clusters),function(val){
all_info <- strsplit(val,":")[[1]]
strand <- strsplit(all_info[4],"_")[[1]][3]
paste(c(all_info[seq(3)],strand),collapse=":")
},character(1))
P1_clusters_sig <- P1_clusters %>% dplyr::filter(P1 <= 0.05)
P2_clusters_file <- "/media/theron/My_Passport/Ali_data/analysis/P2C_pVals.txt"
P2_clusters <- read.table(P2_clusters_file,header=T)
P2_clusters$juncs <- vapply(rownames(P2_clusters),function(val){
all_info <- strsplit(val,":")[[1]]
strand <- strsplit(all_info[4],"_")[[1]][3]
paste(c(all_info[seq(3)],strand),collapse=":")
},character(1))
P2_clusters_sig <- P2_clusters %>% dplyr::filter(P2 <= 0.05)
P3_clusters_file <- "/media/theron/My_Passport/Ali_data/analysis/P3C_pVals.txt"
P3_clusters <- read.table(P3_clusters_file,header=T)
P3_clusters$juncs <- vapply(rownames(P3_clusters),function(val){
all_info <- strsplit(val,":")[[1]]
strand <- strsplit(all_info[4],"_")[[1]][3]
paste(c(all_info[seq(3)],strand),collapse=":")
},character(1))
P3_clusters_sig <- P3_clusters %>% dplyr::filter(P3 <= 0.05)
P5_clusters_file <- "/media/theron/My_Passport/Ali_data/analysis/P5C_pVals.txt"
P5_clusters <- read.table(P5_clusters_file,header=T)
P5_clusters$juncs <- vapply(rownames(P5_clusters),function(val){
all_info <- strsplit(val,":")[[1]]
strand <- strsplit(all_info[4],"_")[[1]][3]
paste(c(all_info[seq(3)],strand),collapse=":")
},character(1))
P5_clusters_sig <- P5_clusters %>% dplyr::filter(P5 <= 0.05)
P6_clusters_file <- "/media/theron/My_Passport/Ali_data/analysis/P6C_pVals.txt"
P6_clusters <- read.table(P6_clusters_file,header=T)
P6_clusters$juncs <- vapply(rownames(P6_clusters),function(val){
all_info <- strsplit(val,":")[[1]]
strand <- strsplit(all_info[4],"_")[[1]][3]
paste(c(all_info[seq(3)],strand),collapse=":")
},character(1))
P6_clusters_sig <- P6_clusters %>% dplyr::filter(P6 <= 0.05)
P7_clusters_file <- "/media/theron/My_Passport/Ali_data/analysis/P7C_pVals.txt"
P7_clusters <- read.table(P7_clusters_file,header=T)
P7_clusters$juncs <- vapply(rownames(P7_clusters),function(val){
all_info <- strsplit(val,":")[[1]]
strand <- strsplit(all_info[4],"_")[[1]][3]
paste(c(all_info[seq(3)],strand),collapse=":")
},character(1))
P7_clusters_sig <- P7_clusters %>% dplyr::filter(P7 <= 0.05)
juncs_sig <- unique(c(C1_clusters_sig$juncs,C2_clusters_sig$juncs,
C3_clusters_sig$juncs,C5_clusters_sig$juncs,
C6_clusters_sig$juncs,C7_clusters_sig$juncs,
P1_clusters_sig$juncs,P2_clusters_sig$juncs,
P3_clusters_sig$juncs,P5_clusters_sig$juncs,
P6_clusters_sig$juncs,P7_clusters_sig$juncs))
juncs_all <- unique(c(C1_clusters$juncs,C2_clusters$juncs,
C3_clusters$juncs,C5_clusters$juncs,
C6_clusters$juncs,C7_clusters$juncs,
P1_clusters$juncs,P2_clusters$juncs,
P3_clusters$juncs,P5_clusters$juncs,
P6_clusters$juncs,P7_clusters$juncs))
all_sample_pvals <- data.frame(juncs_all)
rownames(all_sample_pvals) <- juncs_all
all_sample_pvals$C1 <- NA
all_sample_pvals[C1_clusters$juncs,"C1"]<-C1_clusters$C1
all_sample_pvals$C2 <- NA
all_sample_pvals[C2_clusters$juncs,"C2"]<-C2_clusters$C2
all_sample_pvals$C3 <- NA
all_sample_pvals[C3_clusters$juncs,"C3"]<-C3_clusters$C3
all_sample_pvals$C5 <- NA
all_sample_pvals[C5_clusters$juncs,"C5"]<-C5_clusters$C5
all_sample_pvals$C6 <- NA
all_sample_pvals[C6_clusters$juncs,"C6"]<-C6_clusters$C6
all_sample_pvals$C7 <- NA
all_sample_pvals[C7_clusters$juncs,"C7"]<-C7_clusters$C7
all_sample_pvals$P1 <- NA
all_sample_pvals[P1_clusters$juncs,"P1"]<-P1_clusters$P1
all_sample_pvals$P2 <- NA
all_sample_pvals[P2_clusters$juncs,"P2"]<-P2_clusters$P2
all_sample_pvals$P3 <- NA
all_sample_pvals[P3_clusters$juncs,"P3"]<-P3_clusters$P3
all_sample_pvals$P5 <- NA
all_sample_pvals[P5_clusters$juncs,"P5"]<-P5_clusters$P5
all_sample_pvals$P6 <- NA
all_sample_pvals[P6_clusters$juncs,"P6"]<-P6_clusters$P6
all_sample_pvals$P7 <- NA
all_sample_pvals[P7_clusters$juncs,"P7"]<-P7_clusters$P7
all_sample_pvals<-all_sample_pvals[juncs_sig,seq(2,ncol(all_sample_pvals))]
C1_clusters_file <- "/media/theron/My_Passport/Ali_data/analysis/C1P_perind.counts.gz"
C1_clusters <- read.table(C1_clusters_file,header=T)
rownames(C1_clusters) <- C1_clusters$chrom
C1_clusters$juncs <- vapply(rownames(C1_clusters),function(val){
all_info <- strsplit(val,":")[[1]]
strand <- strsplit(all_info[4],"_")[[1]][3]
paste(c(all_info[seq(3)],strand),collapse=":")
},character(1))
C1_clusters_psi <- C1_clusters[rownames(C1_clusters_sig),]
C2_clusters_file <- "/media/theron/My_Passport/Ali_data/analysis/C2P_perind.counts.gz"
C2_clusters <- read.table(C2_clusters_file,header=T)
rownames(C2_clusters) <- C2_clusters$chrom
C2_clusters$juncs <- vapply(rownames(C2_clusters),function(val){
all_info <- strsplit(val,":")[[1]]
strand <- strsplit(all_info[4],"_")[[1]][3]
paste(c(all_info[seq(3)],strand),collapse=":")
},character(1))
C2_clusters_psi <- C2_clusters[rownames(C2_clusters_sig),]
C3_clusters_file <- "/media/theron/My_Passport/Ali_data/analysis/C3P_perind.counts.gz"
C3_clusters <- read.table(C3_clusters_file,header=T)
rownames(C3_clusters) <- C3_clusters$chrom
C3_clusters$juncs <- vapply(rownames(C3_clusters),function(val){
all_info <- strsplit(val,":")[[1]]
strand <- strsplit(all_info[4],"_")[[1]][3]
paste(c(all_info[seq(3)],strand),collapse=":")
},character(1))
C3_clusters_psi <- C3_clusters[rownames(C3_clusters_sig),]
C5_clusters_file <- "/media/theron/My_Passport/Ali_data/analysis/C5P_perind.counts.gz"
C5_clusters <- read.table(C5_clusters_file,header=T)
rownames(C5_clusters) <- C5_clusters$chrom
C5_clusters$juncs <- vapply(rownames(C5_clusters),function(val){
all_info <- strsplit(val,":")[[1]]
strand <- strsplit(all_info[4],"_")[[1]][3]
paste(c(all_info[seq(3)],strand),collapse=":")
},character(1))
C5_clusters_psi <- C5_clusters[rownames(C5_clusters_sig),]
C6_clusters_file <- "/media/theron/My_Passport/Ali_data/analysis/C6P_perind.counts.gz"
C6_clusters <- read.table(C6_clusters_file,header=T)
rownames(C6_clusters) <- C6_clusters$chrom
C6_clusters$juncs <- vapply(rownames(C6_clusters),function(val){
all_info <- strsplit(val,":")[[1]]
strand <- strsplit(all_info[4],"_")[[1]][3]
paste(c(all_info[seq(3)],strand),collapse=":")
},character(1))
C6_clusters_psi <- C6_clusters[rownames(C6_clusters_sig),]
C7_clusters_file <- "/media/theron/My_Passport/Ali_data/analysis/C7P_perind.counts.gz"
C7_clusters <- read.table(C7_clusters_file,header=T)
rownames(C7_clusters) <- C7_clusters$chrom
C7_clusters$juncs <- vapply(rownames(C7_clusters),function(val){
all_info <- strsplit(val,":")[[1]]
strand <- strsplit(all_info[4],"_")[[1]][3]
paste(c(all_info[seq(3)],strand),collapse=":")
},character(1))
C7_clusters_psi <- C7_clusters[rownames(C7_clusters_sig),]
P1_clusters_file <- "/media/theron/My_Passport/Ali_data/analysis/P1C_perind.counts.gz"
P1_clusters <- read.table(P1_clusters_file,header=T)
rownames(P1_clusters) <- P1_clusters$chrom
P1_clusters$juncs <- vapply(rownames(P1_clusters),function(val){
all_info <- strsplit(val,":")[[1]]
strand <- strsplit(all_info[4],"_")[[1]][3]
paste(c(all_info[seq(3)],strand),collapse=":")
},character(1))
P1_clusters_psi <- P1_clusters[rownames(P1_clusters_sig),]
P2_clusters_file <- "/media/theron/My_Passport/Ali_data/analysis/P2C_perind.counts.gz"
P2_clusters <- read.table(P2_clusters_file,header=T)
rownames(P2_clusters) <- P2_clusters$chrom
P2_clusters$juncs <- vapply(rownames(P2_clusters),function(val){
all_info <- strsplit(val,":")[[1]]
strand <- strsplit(all_info[4],"_")[[1]][3]
paste(c(all_info[seq(3)],strand),collapse=":")
},character(1))
P2_clusters_psi <- P2_clusters[rownames(P2_clusters_sig),]
P3_clusters_file <- "/media/theron/My_Passport/Ali_data/analysis/P3C_perind.counts.gz"
P3_clusters <- read.table(P3_clusters_file,header=T)
rownames(P3_clusters) <- P3_clusters$chrom
P3_clusters$juncs <- vapply(rownames(P3_clusters),function(val){
all_info <- strsplit(val,":")[[1]]
strand <- strsplit(all_info[4],"_")[[1]][3]
paste(c(all_info[seq(3)],strand),collapse=":")
},character(1))
P3_clusters_psi <- P3_clusters[rownames(P3_clusters_sig),]
P5_clusters_file <- "/media/theron/My_Passport/Ali_data/analysis/P5C_perind.counts.gz"
P5_clusters <- read.table(P5_clusters_file,header=T)
rownames(P5_clusters) <- P5_clusters$chrom
P5_clusters$juncs <- vapply(rownames(P5_clusters),function(val){
all_info <- strsplit(val,":")[[1]]
strand <- strsplit(all_info[4],"_")[[1]][3]
paste(c(all_info[seq(3)],strand),collapse=":")
},character(1))
P5_clusters_psi <- P5_clusters[rownames(P5_clusters_sig),]
P6_clusters_file <- "/media/theron/My_Passport/Ali_data/analysis/P6C_perind.counts.gz"
P6_clusters <- read.table(P6_clusters_file,header=T)
rownames(P6_clusters) <- P6_clusters$chrom
P6_clusters$juncs <- vapply(rownames(P6_clusters),function(val){
all_info <- strsplit(val,":")[[1]]
strand <- strsplit(all_info[4],"_")[[1]][3]
paste(c(all_info[seq(3)],strand),collapse=":")
},character(1))
P6_clusters_psi <- P6_clusters[rownames(P6_clusters_sig),]
P7_clusters_file <- "/media/theron/My_Passport/Ali_data/analysis/P7C_perind.counts.gz"
P7_clusters <- read.table(P7_clusters_file,header=T)
rownames(P7_clusters) <- P7_clusters$chrom
P7_clusters$juncs <- vapply(rownames(P7_clusters),function(val){
all_info <- strsplit(val,":")[[1]]
strand <- strsplit(all_info[4],"_")[[1]][3]
paste(c(all_info[seq(3)],strand),collapse=":")
},character(1))
P7_clusters_psi <- P7_clusters[rownames(P7_clusters_sig),]
juncs_all <- unique(c(C1_clusters$juncs,C2_clusters$juncs,
C3_clusters$juncs,C5_clusters$juncs,
C6_clusters$juncs,C7_clusters$juncs,
P1_clusters$juncs,P2_clusters$juncs,
P3_clusters$juncs,P5_clusters$juncs,
P6_clusters$juncs,P7_clusters$juncs))
all_sample_psi <- data.frame(juncs_all)
rownames(all_sample_psi) <- juncs_all
all_sample_psi$C1 <- NA
all_sample_psi[C1_clusters$juncs,"C1"]<-C1_clusters$C1
all_sample_psi$C2 <- NA
all_sample_psi[C2_clusters$juncs,"C2"]<-C2_clusters$C2
all_sample_psi$C3 <- NA
all_sample_psi[C3_clusters$juncs,"C3"]<-C3_clusters$C3
all_sample_psi$C5 <- NA
all_sample_psi[C5_clusters$juncs,"C5"]<-C5_clusters$C5
all_sample_psi$C6 <- NA
all_sample_psi[C6_clusters$juncs,"C6"]<-C6_clusters$C6
all_sample_psi$C7 <- NA
all_sample_psi[C7_clusters$juncs,"C7"]<-C7_clusters$C7
all_sample_psi$P1 <- NA
all_sample_psi[P1_clusters$juncs,"P1"]<-P1_clusters$P1
all_sample_psi$P2 <- NA
all_sample_psi[P2_clusters$juncs,"P2"]<-P2_clusters$P2
all_sample_psi$P3 <- NA
all_sample_psi[P3_clusters$juncs,"P3"]<-P3_clusters$P3
all_sample_psi$P5 <- NA
all_sample_psi[P5_clusters$juncs,"P5"]<-P5_clusters$P5
all_sample_psi$P6 <- NA
all_sample_psi[P6_clusters$juncs,"P6"]<-P6_clusters$P6
all_sample_psi$P7 <- NA
all_sample_psi[P7_clusters$juncs,"P7"]<-P7_clusters$P7
all_sample_psi<-all_sample_psi[juncs_sig,seq(2,ncol(all_sample_psi))]
Normalizing and conglomerating the junction counts per outlier junction
C1_juncs <- read.table("/media/theron/My_Passport/Ali_data/analysis/C1.junc",sep="\t")
C1_juncs$juncs<-sprintf("%s:%s:%s:%s",C1_juncs$V1,C1_juncs$V2,C1_juncs$V3,C1_juncs$V6)
C1_juncs$norm <- (C1_juncs$V5/sum(C1_juncs$V5))*10000
C1_juncs<-vapply(juncs_sig,function(junc){
a<-which(C1_juncs$juncs==junc)
if(length(a)==0){
return(0)
} else {
return(as.numeric(C1_juncs[a,"norm"]))
}
},numeric(1))
names(C1_juncs)<-juncs_sig
C1_juncs<-data.frame(C1_juncs)
C1_juncs$juncs<-rownames(C1_juncs)
C1_juncs <- C1_juncs %>% dplyr::filter(juncs %in% juncs_sig)
C2_juncs <- read.table("/media/theron/My_Passport/Ali_data/analysis/C2.junc",sep="\t")
C2_juncs$juncs<-sprintf("%s:%s:%s:%s",C2_juncs$V1,C2_juncs$V2,C2_juncs$V3,C2_juncs$V6)
C2_juncs$norm <- (C2_juncs$V5/sum(C2_juncs$V5))*10000
C2_juncs<-vapply(juncs_sig,function(junc){
a<-which(C2_juncs$juncs==junc)
if(length(a)==0){
return(0)
} else {
return(as.numeric(C2_juncs[a,"norm"]))
}
},numeric(1))
names(C2_juncs)<-juncs_sig
C2_juncs<-data.frame(C2_juncs)
C2_juncs$juncs<-rownames(C2_juncs)
C2_juncs <- C2_juncs %>% dplyr::filter(juncs %in% juncs_sig)
C3_juncs <- read.table("/media/theron/My_Passport/Ali_data/analysis/C3.junc",sep="\t")
C3_juncs$juncs<-sprintf("%s:%s:%s:%s",C3_juncs$V1,C3_juncs$V2,C3_juncs$V3,C3_juncs$V6)
C3_juncs$norm <- (C3_juncs$V5/sum(C3_juncs$V5))*10000
C3_juncs<-vapply(juncs_sig,function(junc){
a<-which(C3_juncs$juncs==junc)
if(length(a)==0){
return(0)
} else {
return(as.numeric(C3_juncs[a,"norm"]))
}
},numeric(1))
names(C3_juncs)<-juncs_sig
C3_juncs<-data.frame(C3_juncs)
C3_juncs$juncs<-rownames(C3_juncs)
C3_juncs <- C3_juncs %>% dplyr::filter(juncs %in% juncs_sig)
C5_juncs <- read.table("/media/theron/My_Passport/Ali_data/analysis/C5.junc",sep="\t")
C5_juncs$juncs<-sprintf("%s:%s:%s:%s",C5_juncs$V1,C5_juncs$V2,C5_juncs$V3,C5_juncs$V6)
C5_juncs$norm <- (C5_juncs$V5/sum(C5_juncs$V5))*10000
C5_juncs<-vapply(juncs_sig,function(junc){
a<-which(C5_juncs$juncs==junc)
if(length(a)==0){
return(0)
} else {
return(as.numeric(C5_juncs[a,"norm"]))
}
},numeric(1))
names(C5_juncs)<-juncs_sig
C5_juncs<-data.frame(C5_juncs)
C5_juncs$juncs<-rownames(C5_juncs)
C5_juncs <- C5_juncs %>% dplyr::filter(juncs %in% juncs_sig)
C6_juncs <- read.table("/media/theron/My_Passport/Ali_data/analysis/C6.junc",sep="\t")
C6_juncs$juncs<-sprintf("%s:%s:%s:%s",C6_juncs$V1,C6_juncs$V2,C6_juncs$V3,C6_juncs$V6)
C6_juncs$norm <- (C6_juncs$V5/sum(C6_juncs$V5))*10000
C6_juncs<-vapply(juncs_sig,function(junc){
a<-which(C6_juncs$juncs==junc)
if(length(a)==0){
return(0)
} else {
return(as.numeric(C6_juncs[a,"norm"]))
}
},numeric(1))
names(C6_juncs)<-juncs_sig
C6_juncs<-data.frame(C6_juncs)
C6_juncs$juncs<-rownames(C6_juncs)
C6_juncs <- C6_juncs %>% dplyr::filter(juncs %in% juncs_sig)
C7_juncs <- read.table("/media/theron/My_Passport/Ali_data/analysis/C7.junc",sep="\t")
C7_juncs$juncs<-sprintf("%s:%s:%s:%s",C7_juncs$V1,C7_juncs$V2,C7_juncs$V3,C7_juncs$V6)
C7_juncs$norm <- (C7_juncs$V5/sum(C7_juncs$V5))*10000
C7_juncs<-vapply(juncs_sig,function(junc){
a<-which(C7_juncs$juncs==junc)
if(length(a)==0){
return(0)
} else {
return(as.numeric(C7_juncs[a,"norm"]))
}
},numeric(1))
names(C7_juncs)<-juncs_sig
C7_juncs<-data.frame(C7_juncs)
C7_juncs$juncs<-rownames(C7_juncs)
C7_juncs <- C7_juncs %>% dplyr::filter(juncs %in% juncs_sig)
P1_juncs <- read.table("/media/theron/My_Passport/Ali_data/analysis/P1.junc",sep="\t")
P1_juncs$juncs<-sprintf("%s:%s:%s:%s",P1_juncs$V1,P1_juncs$V2,P1_juncs$V3,P1_juncs$V6)
P1_juncs$norm <- (P1_juncs$V5/sum(P1_juncs$V5))*10000
P1_juncs<-vapply(juncs_sig,function(junc){
a<-which(P1_juncs$juncs==junc)
if(length(a)==0){
return(0)
} else {
return(as.numeric(P1_juncs[a,"norm"]))
}
},numeric(1))
names(P1_juncs)<-juncs_sig
P1_juncs<-data.frame(P1_juncs)
P1_juncs$juncs<-rownames(P1_juncs)
P1_juncs <- P1_juncs %>% dplyr::filter(juncs %in% juncs_sig)
P2_juncs <- read.table("/media/theron/My_Passport/Ali_data/analysis/P2.junc",sep="\t")
P2_juncs$juncs<-sprintf("%s:%s:%s:%s",P2_juncs$V1,P2_juncs$V2,P2_juncs$V3,P2_juncs$V6)
P2_juncs$norm <- (P2_juncs$V5/sum(P2_juncs$V5))*10000
P2_juncs<-vapply(juncs_sig,function(junc){
a<-which(P2_juncs$juncs==junc)
if(length(a)==0){
return(0)
} else {
return(as.numeric(P2_juncs[a,"norm"]))
}
},numeric(1))
names(P2_juncs)<-juncs_sig
P2_juncs<-data.frame(P2_juncs)
P2_juncs$juncs<-rownames(P2_juncs)
P2_juncs <- P2_juncs %>% dplyr::filter(juncs %in% juncs_sig)
P3_juncs <- read.table("/media/theron/My_Passport/Ali_data/analysis/P3.junc",sep="\t")
P3_juncs$juncs<-sprintf("%s:%s:%s:%s",P3_juncs$V1,P3_juncs$V2,P3_juncs$V3,P3_juncs$V6)
P3_juncs$norm <- (P3_juncs$V5/sum(P3_juncs$V5))*10000
P3_juncs<-vapply(juncs_sig,function(junc){
a<-which(P3_juncs$juncs==junc)
if(length(a)==0){
return(0)
} else {
return(as.numeric(P3_juncs[a,"norm"]))
}
},numeric(1))
names(P3_juncs)<-juncs_sig
P3_juncs<-data.frame(P3_juncs)
P3_juncs$juncs<-rownames(P3_juncs)
P3_juncs <- P3_juncs %>% dplyr::filter(juncs %in% juncs_sig)
P5_juncs <- read.table("/media/theron/My_Passport/Ali_data/analysis/P5.junc",sep="\t")
P5_juncs$juncs<-sprintf("%s:%s:%s:%s",P5_juncs$V1,P5_juncs$V2,P5_juncs$V3,P5_juncs$V6)
P5_juncs$norm <- (P5_juncs$V5/sum(P5_juncs$V5))*10000
P5_juncs<-vapply(juncs_sig,function(junc){
a<-which(P5_juncs$juncs==junc)
if(length(a)==0){
return(0)
} else {
return(as.numeric(P5_juncs[a,"norm"]))
}
},numeric(1))
names(P5_juncs)<-juncs_sig
P5_juncs<-data.frame(P5_juncs)
P5_juncs$juncs<-rownames(P5_juncs)
P5_juncs <- P5_juncs %>% dplyr::filter(juncs %in% juncs_sig)
P6_juncs <- read.table("/media/theron/My_Passport/Ali_data/analysis/P6.junc",sep="\t")
P6_juncs$juncs<-sprintf("%s:%s:%s:%s",P6_juncs$V1,P6_juncs$V2,P6_juncs$V3,P6_juncs$V6)
P6_juncs$norm <- (P6_juncs$V5/sum(P6_juncs$V5))*10000
P6_juncs<-vapply(juncs_sig,function(junc){
a<-which(P6_juncs$juncs==junc)
if(length(a)==0){
return(0)
} else {
return(as.numeric(P6_juncs[a,"norm"]))
}
},numeric(1))
names(P6_juncs)<-juncs_sig
P6_juncs<-data.frame(P6_juncs)
P6_juncs$juncs<-rownames(P6_juncs)
P6_juncs <- P6_juncs %>% dplyr::filter(juncs %in% juncs_sig)
P7_juncs <- read.table("/media/theron/My_Passport/Ali_data/analysis/P7.junc",sep="\t")
P7_juncs$juncs<-sprintf("%s:%s:%s:%s",P7_juncs$V1,P7_juncs$V2,P7_juncs$V3,P7_juncs$V6)
P7_juncs$norm <- (P7_juncs$V5/sum(P7_juncs$V5))*10000
P7_juncs<-vapply(juncs_sig,function(junc){
a<-which(P7_juncs$juncs==junc)
if(length(a)==0){
return(0)
} else {
return(as.numeric(P7_juncs[a,"norm"]))
}
},numeric(1))
names(P7_juncs)<-juncs_sig
P7_juncs<-data.frame(P7_juncs)
P7_juncs$juncs<-rownames(P7_juncs)
P7_juncs <- P7_juncs %>% dplyr::filter(juncs %in% juncs_sig)
U1_juncs <- read.table("/media/theron/My_Passport/Ali_data/analysis/U1.junc",sep="\t")
U1_juncs$juncs<-sprintf("%s:%s:%s:%s",U1_juncs$V1,U1_juncs$V2,U1_juncs$V3,U1_juncs$V6)
U1_juncs$norm <- (U1_juncs$V5/sum(U1_juncs$V5))*10000
U1_juncs<-vapply(juncs_sig,function(junc){
a<-which(U1_juncs$juncs==junc)
if(length(a)==0){
return(0)
} else {
return(as.numeric(U1_juncs[a,"norm"]))
}
},numeric(1))
names(U1_juncs)<-juncs_sig
U1_juncs<-data.frame(U1_juncs)
U1_juncs$juncs<-rownames(U1_juncs)
U1_juncs <- U1_juncs %>% dplyr::filter(juncs %in% juncs_sig)
U2_juncs <- read.table("/media/theron/My_Passport/Ali_data/analysis/U2.junc",sep="\t")
U2_juncs$juncs<-sprintf("%s:%s:%s:%s",U2_juncs$V1,U2_juncs$V2,U2_juncs$V3,U2_juncs$V6)
U2_juncs$norm <- (U2_juncs$V5/sum(U2_juncs$V5))*10000
U2_juncs<-vapply(juncs_sig,function(junc){
a<-which(U2_juncs$juncs==junc)
if(length(a)==0){
return(0)
} else {
return(as.numeric(U2_juncs[a,"norm"]))
}
},numeric(1))
names(U2_juncs)<-juncs_sig
U2_juncs<-data.frame(U2_juncs)
U2_juncs$juncs<-rownames(U2_juncs)
U2_juncs <- U2_juncs %>% dplyr::filter(juncs %in% juncs_sig)
U3_juncs <- read.table("/media/theron/My_Passport/Ali_data/analysis/U3.junc",sep="\t")
U3_juncs$juncs<-sprintf("%s:%s:%s:%s",U3_juncs$V1,U3_juncs$V2,U3_juncs$V3,U3_juncs$V6)
U3_juncs$norm <- (U3_juncs$V5/sum(U3_juncs$V5))*10000
U3_juncs<-vapply(juncs_sig,function(junc){
a<-which(U3_juncs$juncs==junc)
if(length(a)==0){
return(0)
} else {
return(as.numeric(U3_juncs[a,"norm"]))
}
},numeric(1))
names(U3_juncs)<-juncs_sig
U3_juncs<-data.frame(U3_juncs)
U3_juncs$juncs<-rownames(U3_juncs)
U3_juncs <- U3_juncs %>% dplyr::filter(juncs %in% juncs_sig)
all_sample_counts_norm <- cbind(C1_juncs$C1_juncs,
C2_juncs$C2_juncs,
C3_juncs$C3_juncs,
C5_juncs$C5_juncs,
C6_juncs$C6_juncs,
C7_juncs$C7_juncs,
P1_juncs$P1_juncs,
P2_juncs$P2_juncs,
P3_juncs$P3_juncs,
P5_juncs$P5_juncs,
P6_juncs$P6_juncs,
P7_juncs$P7_juncs,
U1_juncs$U1_juncs,
U2_juncs$U2_juncs,
U3_juncs$U3_juncs,
data.frame(juncs_sig))
colnames(all_sample_counts_norm)<-c("C1_norm","C2_norm","C3_norm","C5_norm","C6_norm","C7_norm",
"P1_norm","P2_norm","P3_norm","P5_norm","P6_norm","P7_norm",
"U1_norm","U2_norm","U3_norm","juncs")
Configuring psi matrix to be numeric
cols <- colnames(all_sample_psi)
for (col in cols){
all_sample_psi[,col]<-vapply(all_sample_psi[,col],function(val){
if (is.na(val)){
return(NA)
}
frac<-strsplit(val,"/")[[1]]
frac<-as.numeric(frac)
if (frac[1]==0){
return(0)
}
frac <- frac[1]/frac[2]
},numeric(1))
}
Saving psi and pval matrices
write.table(all_sample_psi,
file="/media/theron/My_Passport/Ali_data/analysis/all_sample_psi_CP_PC.txt",
sep="\t",
quote=F,
col.names=T,
row.names=F)
write.table(all_sample_pvals,
file="/media/theron/My_Passport/Ali_data/analysis/all_sample_pval_CP_PC.txt",
sep="\t",
quote=F,
col.names=T,
row.names=F)
write.table(all_sample_counts_norm,
file="/media/theron/My_Passport/Ali_data/analysis/all_sample_counts_norm_CP_PC.txt",
sep="\t",
quote=F,
col.names=T,
row.names=F)
juncs_sig_df <- data.frame(matrix(unlist(strsplit(juncs_sig,":")),nrow=length(juncs_sig),ncol=4,byrow=T))
colnames(juncs_sig_df)<-c("chrom","start","end","strand")
write.table(juncs_sig_df,
file="/media/theron/My_Passport/Ali_data/analysis/juncs_sig_CP_PC.txt",
sep="\t",
quote=F,
col.names=T,
row.names=F)
saveRDS(juncs_sig_df,
file="/media/theron/My_Passport/Ali_data/analysis/juncs_sig_CP_PC.rds")
juncs_all_df <- data.frame(matrix(unlist(strsplit(juncs_all,":")),nrow=length(juncs_all),ncol=4,byrow=T))
colnames(juncs_all_df)<-c("chrom","start","end","strand")
write.table(juncs_all_df,
file="/media/theron/My_Passport/Ali_data/analysis/juncs_all_CP_PC.txt",
sep="\t",
quote=F,
col.names=T,
row.names=F)
txdb_file<-"/media/theron/My_Passport/reference_genomes/GTF_GFF/ENSEMBL/Mus_musculus.GRCm38.102.chr.sqlite"
txdb<-loadDb(txdb_file) # making the txdb from gtf
introns_by_tx<-data.frame(unlist(intronsByTranscript(txdb,use.names=T)))
juncs_test<-sprintf("%s:%s:%s:%s",introns_by_tx$seqnames,introns_by_tx$start-1,introns_by_tx$end+1,introns_by_tx$strand)
introns_by_tx$juncs <- juncs_test
C1_SJ<-read.table("/media/theron/My_Passport/Ali_data/analysis/C1SJ.out.tab")
C1_SJ_strand <- vapply(C1_SJ$V4,function(val){
if (val == 0){
return("*")
} else if (val == 1){
return("+")
} else if (val == 2){
return("-")
}
},character(1))
juncs<-sprintf("%s:%s:%s:%s",C1_SJ$V1,C1_SJ$V2,C1_SJ$V3,C1_SJ_strand)
annotated_outlier_junctions<-juncs_all %in% introns_by_tx$juncs
table(annotated_outlier_junctions)
## annotated_outlier_junctions
## FALSE TRUE
## 1571 8821
maf_file <- "/media/theron/My_Passport/Ali_data/WES Data for 4T1MIS/MB01JHU503/MB01JHU503_000_analysis/MAF/DNA_4TIMSI_haplotypecaller.maf"
ali_maf = read.maf(maf_file)
## -Reading
## -Validating
## -Silent variants: 817928
## -Summarizing
## -Processing clinical data
## --Missing clinical data
## -Finished in 5.713s elapsed (14.3s cpu)
ali_maf_df <- data.frame(ali_maf@data)
ali_maf_df_miss <- ali_maf_df %>% dplyr::filter(Variant_Type == "SNP" & Variant_Classification=="Missense_Mutation")
transcripts <- lapply(ali_maf_df_miss$all_effects,function(val){
val_split <- strsplit(val,",")[[1]]
val_split[str_detect(val_split,"ENSMUST")]
})
refseq_transcripts <- lapply(ali_maf_df_miss$all_effects,function(val){
val_split <- strsplit(val,",")[[1]]
loc <- which(str_detect(val_split,"ENSMUST"))
unlist(lapply(strsplit(val_split[loc+1],";"),function(val){return(val[1])}))
})
ens_refseq<-data.frame(unlist(transcripts),unlist(refseq_transcripts))
colnames(ens_refseq)<-c("ensembl","refseq")
ens_refseq<-unique(ens_refseq)
source("/media/theron/My_Passport/splicemute/R/functions.R")
locate_mutation_in_cds <- function(cds_tx_sort,mutation_chr,
mutation_start,mutation_end){
width <- running_sum(cds_tx_sort$width)
for (i in seq(nrow(cds_tx_sort))){
if(mutation_end <= cds_tx_sort[i,"end"] & mutation_end >= cds_tx_sort[i,"start"]){
return(width[i] - (cds_tx_sort[i,"end"]-mutation_end))
}
}
return(-1)
}
extract_portion <- function(trans_str,mut_loc_prot){
trans_str_split <- strsplit(trans_str,"")[[1]]
trans_str_split[mut_loc_prot] <- tolower(trans_str_split[mut_loc_prot])
start <- mut_loc_prot - 8
if (start < 1){
start<-1
}
end <- mut_loc_prot + 8
if (end > nchar(trans_str)){
end <- nchar(trans_str)
}
return(paste(trans_str_split[seq(start,end)],collapse=""))
}
conjugate <- function(nuc){
if (nuc == "A"){
return("T")
} else if (nuc == "T"){
return("A")
} else if (nuc == "G"){
return("C")
} else if (nuc == "C"){
return("G")
}
}
BSgenome <- BSgenome.Mmusculus.GENCODE.GRCm38.102
txdb<-loadDb("/media/theron/My_Passport/reference_genomes/GTF_GFF/ENSEMBL/Mus_musculus.GRCm38.102.chr.sqlite")
cds_by_tx<-cdsBy(txdb,by="tx",use.names=T)
protein_coding_txs <- names(cds_by_tx)
tots<-nrow(ali_maf_df_miss)
ali_maf_dat_muts <- as.data.frame(t(vapply(seq(tots),function(i){
if (i %% 100 == 0){print(sprintf("%d out of%d",i,tots))}
txs <- transcripts[[i]]
tx_record <- c()
mut_loc <- vapply(txs,function(val){
tx_record <<-c(tx_record,val)
if (val %in% protein_coding_txs){
cds_tx <- cds_by_tx[[val]]
cds_tx_sort <- as.data.frame(sort(cds_tx))
mutation_chr <- ali_maf_df_miss$Chromosome[i]
mutation_start <- ali_maf_df_miss$Start_Position[i]
mutation_end <- ali_maf_df_miss$End_Position[i]
mutation_strand <- ali_maf_df_miss$Strand[i]
mutation_allele <- ali_maf_df_miss$Allele[i]
mutation_allele_norm <- ali_maf_df_miss$Reference_Allele[i]
mut_loc<-locate_mutation_in_cds(cds_tx_sort,mutation_chr,
mutation_start,mutation_end)
sequence<-getSeq(BSgenome,sort(cds_tx))
sequ<-paste(as.character(sequence[1:length(sequence)]), collapse="")
if(!check_tx(sequ)){return("untrans")}
if (mut_loc == -1){
return("outside_cds")
}
sequ_split<-strsplit(sequ,"")[[1]]
if (mutation_strand != cds_tx_sort$strand[1]){
sequ_split[mut_loc]<-conjugate(mutation_allele)
sequ_mut <- paste(sequ_split,collapse="")
} else if (mutation_strand == cds_tx_sort$strand[1]){
sequ_split[mut_loc]<-mutation_allele
sequ_mut <- paste(sequ_split,collapse="")
}
trans<-translate(DNAStringSet(sequ_mut),no.init.codon = F)
mut_loc_prot <- floor(mut_loc/3)
trans_str <- as.character(trans)
portion <- extract_portion(trans_str,mut_loc_prot)
return(portion)
} else {
return("outside_cds")
}
},character(1))
mut_loc_names <- paste(names(mut_loc),collapse=":")
mut_loc_vals <- paste(unname(mut_loc),collapse=":")
return(c(mut_loc_names,mut_loc_vals))
},character(2))))
write.table(ali_maf_dat_muts,
file="/media/theron/My_Passport/Ali_data/analysis/ali_maf_muts.txt",
sep="\t",
quote=F,
col.names=T,
row.names=F)
saveRDS(ali_maf_dat_muts,file="/media/theron/My_Passport/Ali_data/analysis/ali_maf_muts.rds")
ali_maf_dat_muts <- read.table("/media/theron/My_Passport/Ali_data/analysis/ali_maf_muts.txt",header = T)
sequences<-lapply(seq(nrow(ali_maf_dat_muts)),function(row_val){
transcripts<-strsplit(ali_maf_dat_muts$V1[row_val],":")[[1]]
sequences<-strsplit(ali_maf_dat_muts$V2[row_val],":")[[1]]
keep<-!(sequences %in% c("outside_cds","untrans"))
if (!any(keep)){return(NA)}
tx<-transcripts[keep]
seq<-sequences[keep]
names(seq)<-sprintf("%s_%d",tx,rep(row_val,length(which(keep))))
return(seq)
})
sequences<-unlist(sequences)
sequences<-sequences[!is.na(sequences)]
tx_name <- names(sequences)
sequences <- vapply(sequences,function(seq){
seq<-toupper(seq)
seq<-str_replace_all(seq,"[*]","")
},character(1))
names(sequences)<-seq(length(tx_name))
sequences<-AAStringSet(sequences)
out_fasta<-"/media/theron/My_Passport/Ali_data/analysis/ali_maf_muts.fa"
writeXStringSet(sequences,out_fasta)
saveRDS(tx_name,file="/media/theron/My_Passport/Ali_data/analysis/tx_names.rds")
breaks<-seq(1,length(sequences),5000)
for (i in seq(length(breaks))){
if (i == length(breaks)){
sequences_small<-AAStringSet(sequences[seq(breaks[i],length(sequences))])
out_fasta<-sprintf("/media/theron/My_Passport/Ali_data/analysis/ali_maf_muts_%d.fa",i)
writeXStringSet(sequences_small,out_fasta,append=FALSE)
} else {
sequences_small<-AAStringSet(sequences[seq(breaks[i],breaks[i+1]-1)])
out_fasta<-sprintf("/media/theron/My_Passport/Ali_data/analysis/ali_maf_muts_%d.fa",i)
writeXStringSet(sequences_small,out_fasta,append=FALSE)
}
}
mut1<-read.table("/media/theron/My_Passport/Ali_data/analysis/immunogenicity/mut1_NetMHC.txt",header=T)
mut2<-read.table("/media/theron/My_Passport/Ali_data/analysis/immunogenicity/mut2_NetMHC.txt",header=T)
mut3<-read.table("/media/theron/My_Passport/Ali_data/analysis/immunogenicity/mut3_NetMHC.txt",header=T)
mut4<-read.table("/media/theron/My_Passport/Ali_data/analysis/immunogenicity/mut4_NetMHC.txt",header=T)
mut_imm <- rbind(mut1,mut2,mut3,mut4)
mut_imm_binding <- mut_imm %>% dplyr::filter(N_binders>0)
tx_names<-readRDS("/media/theron/My_Passport/Ali_data/analysis/tx_names.rds")
tx_names<-data.frame(t(vapply(tx_names,function(tx){str_split(tx,"_")[[1]]},character(2))))
mut_imm_binding$tx<-NA
mut_imm_binding$row_val<-NA
mut_imm_binding[,c("tx","row_val")]<-tx_names[mut_imm_binding$ID,c("X1","X2")]
RSEM_transcripts <- read.table("/media/theron/My_Passport/Ali_data/HTMBmice_RNAseq_Data/MB01JHU504/MB01JHU504_000_analysis/RSEM/tables/isoforms/isoforms_tpm_all_samples_norm.txt",header=T)
rownames(RSEM_transcripts) <- RSEM_transcripts$transcript_id
refseq_val <- vapply(mut_imm_binding$tx,function(ID){
a <- which(ens_refseq$ensembl==ID)
if (length(a) == 0){
return("-1")
} else {
refseq_val<-ens_refseq$refseq[a[1]]
return(substr(refseq_val,1,nchar(refseq_val)-2))
}
},character(1))
N_binders<-mut_imm_binding$N_binders
row_val<-mut_imm_binding$row_val
RSEM_refseq<-cbind(RSEM_transcripts[unname(refseq_val),],N_binders,row_val)
RSEM_refseq_complete <- RSEM_refseq[!is.na(RSEM_refseq$transcript_id),]
C_mut_av <- RSEM_refseq_complete[,seq(3,8)]
C_mut_vals<-vapply(seq(nrow(C_mut_av)),function(row_val){
mean(as.numeric(C_mut_av[row_val,]),na.rm = T)
},numeric(1))
P_mut_av <- RSEM_refseq_complete[,seq(9,14)]
P_mut_vals<-vapply(seq(nrow(P_mut_av)),function(row_val){
mean(as.numeric(P_mut_av[row_val,]),na.rm = T)
},numeric(1))
U_mut_av <- RSEM_refseq_complete[,seq(15,17)]
U_mut_vals<-vapply(seq(nrow(U_mut_av)),function(row_val){
mean(as.numeric(U_mut_av[row_val,]),na.rm = T)
},numeric(1))
RSEM_refseq_average <- data.frame(C_mut_vals,
P_mut_vals,
U_mut_vals,
RSEM_refseq_complete$N_binders,
RSEM_refseq_complete$transcript_id,
RSEM_refseq_complete$gene_id,
RSEM_refseq_complete$row_val)
colnames(RSEM_refseq_average)<-c("C_TPM_AV","P_TPM_AV","U_TPM_AV",
"N_binders","transcript_id","gene_id",
"row_val")
RSEM_refseq_average_linear <- data.frame(c(RSEM_refseq_average$C_TPM_AV,RSEM_refseq_average$P_TPM_AV,RSEM_refseq_average$U_TPM_AV),
c(rep("combo",nrow(RSEM_refseq_average)),rep("pd1",nrow(RSEM_refseq_average)),rep("untreated",nrow(RSEM_refseq_average))),
rep(RSEM_refseq_average$transcript_id,3),rep(RSEM_refseq_average$gene_id,3),
rep(RSEM_refseq_average$row_val,3),rep(RSEM_refseq_average$N_binders,3))
colnames(RSEM_refseq_average_linear)<-c("TPM","arm","transcript_id","gene_id","row_val","N_binders")
RSEM_refseq_average_linear$unique_id<-sprintf("%s_%s",RSEM_refseq_average$transcript_id,RSEM_refseq_average$row_val)
my_comparisons <- list( c("combo", "pd1"), c("combo", "untreated"), c("pd1", "untreated") )
ggplot(RSEM_refseq_average_linear,aes(x=arm,y=log10(TPM)))+geom_violin(aes(fill=arm))+
stat_compare_means(comparisons = my_comparisons)
ggplot(RSEM_refseq_average_linear,aes(x=arm,y=log10(TPM),group=unique_id,color=unique_id))+
geom_point(size=0.5)+geom_line(size=0.1)+
theme(legend.position = "none")
ggplot(RSEM_refseq_average_linear,aes(x=unique_id,y=log10(TPM),group=arm,color=arm))+
geom_point(shape=1)+geom_line()+
theme(legend.position = "none",
axis.text.x=element_blank(),
axis.ticks.x=element_blank())
RSEM_refseq_average$unique_id<-sprintf("%s_%s",RSEM_refseq_average$transcript_id,RSEM_refseq_average$row_val)
RSEM_refseq_average$expression_order <- vapply(seq(nrow(RSEM_refseq_average)),function(row_val){
vals<-RSEM_refseq_average[row_val, c("C_TPM_AV","P_TPM_AV","U_TPM_AV")]
if (vals$C_TPM_AV < vals$P_TPM_AV & vals$P_TPM_AV < vals$U_TPM_AV){
return(c("combo<pd1<untreated"))
} else if (vals$C_TPM_AV < vals$U_TPM_AV & vals$U_TPM_AV < vals$P_TPM_AV) {
return("combo<untreated<pd1")
} else if (vals$P_TPM_AV < vals$C_TPM_AV & vals$C_TPM_AV < vals$U_TPM_AV){
return("pd1<combo<untreated")
} else if (vals$P_TPM_AV < vals$U_TPM_AV & vals$U_TPM_AV < vals$C_TPM_AV){
return("pd1<untreated<combo")
} else if (vals$U_TPM_AV < vals$P_TPM_AV & vals$P_TPM_AV < vals$C_TPM_AV) {
return("untreated<pd1<combo")
} else if (vals$U_TPM_AV < vals$C_TPM_AV & vals$C_TPM_AV < vals$P_TPM_AV) {
return("untreated<combo<pd1")
} else {
return("something's equal")
}
},character(1))
ggplot(RSEM_refseq_average,aes(x=expression_order,fill=expression_order))+
geom_bar(aes(y=(..count..)/sum(..count..)))+
theme(axis.text.x=element_blank())
out_splice<-read.table("/media/theron/My_Passport/Ali_data/analysis/immunogenicity/outlier_splicing_NetMHC_results.txt",header=T)
out_splice <- out_splice %>% dplyr::filter(N_binders>0)
splicemutr_dat <- read.table("/media/theron/My_Passport/Ali_data/analysis/formed_transcripts/data_splicemutr.txt",header=T)
juncs <- sprintf("%s:%s:%s:%s",splicemutr_dat$chr,splicemutr_dat$start,splicemutr_dat$end,splicemutr_dat$cluster)
splicemutr_dat$juncs <- juncs
txdb_file<-"/media/theron/My_Passport/reference_genomes/GTF_GFF/ENSEMBL/Mus_musculus.GRCm38.102.chr.sqlite"
txdb<-loadDb(txdb_file) # making the txdb from gtf
introns_by_tx<-data.frame(unlist(intronsByTranscript(txdb,use.names=T)))
juncs_test<-sprintf("%s:%s:%s:%s",introns_by_tx$seqnames,introns_by_tx$start-1,introns_by_tx$end+1,introns_by_tx$strand)
introns_by_tx$juncs <- juncs_test
splicemutr_dat$juncs
## [1] "12:113422730:113429468:-" "12:113422730:113429468:-"
## [3] "2:98666730:98666906:-" "2:102828610:102853015:-"
## [5] "2:102828610:102853015:-" "2:102828610:102853015:-"
## [7] "2:102828610:102853015:-" "2:102828610:102853015:-"
## [9] "2:102828610:102853015:-" "2:102828610:102853015:-"
## [11] "2:102828610:102853015:-" "2:35282601:35283905:+"
## [13] "2:35282601:35283905:+" "13:22036003:22043362:+"
## [15] "17:34150825:34151085:+" "17:34150825:34151085:+"
## [17] "17:34150825:34151085:+" "12:113359987:113360100:-"
## [19] "12:113359987:113360100:-" "12:113422730:113429781:-"
## [21] "12:113422730:113429781:-" "7:142505530:142507750:+"
## [23] "7:142505530:142507750:+" "7:142505530:142507750:+"
## [25] "7:142505530:142507750:+" "7:142505530:142507750:+"
## [27] "7:142505530:142507750:+" "7:142505530:142507750:+"
## [29] "7:142505530:142507750:+" "7:142505530:142507750:+"
## [31] "6:70722954:70726435:+" "6:70722954:70726435:+"
## [33] "6:70722954:70726435:+" "12:113422730:113428514:-"
## [35] "12:113422730:113428514:-" "2:98666905:98667158:-"
## [37] "5:105031319:105103769:-" "7:142507788:142508335:+"
## [39] "7:142507788:142508335:+" "7:142507788:142508335:+"
## [41] "7:142507788:142508335:+" "7:142507788:142508335:+"
## [43] "7:142507788:142508335:+" "7:142507788:142508335:+"
## [45] "17:34950475:34951885:-" "17:34950475:34951885:-"
## [47] "17:34950475:34951885:-" "17:34950475:34951885:-"
## [49] "12:113330523:113429085:-" "12:113330523:113429085:-"
## [51] "7:142504065:142505516:+" "7:142504065:142505516:+"
## [53] "7:142504065:142505516:+" "7:142504065:142505516:+"
## [55] "7:142504065:142505516:+" "7:142504065:142505516:+"
## [57] "7:142504065:142505516:+" "7:142504065:142505516:+"
## [59] "7:142504065:142505516:+" "7:142504065:142505516:+"
## [61] "7:142504065:142505516:+" "7:142504065:142505516:+"
## [63] "7:142504065:142505516:+" "7:142504065:142505516:+"
## [65] "7:142504065:142505516:+"
annotated_outlier_junctions<-splicemutr_dat$juncs %in% introns_by_tx$juncs
splicemutr_dat$verdict<-"non_ann"
splicemutr_dat$verdict[annotated_outlier_junctions]<-"ann"
table(splicemutr_dat$verdict)
##
## ann non_ann
## 50 15
splicemutr_dat_filt <- splicemutr_dat %>% dplyr::filter(!(verdict == "ann" & modified == "changed"))
all_sample_psi<-read.table("/media/theron/My_Passport/Ali_data/analysis/all_sample_psi.txt",header=T)
all_sample_pvals <- read.table("/media/theron/My_Passport/Ali_data/analysis/all_sample_pval.txt",header=T)
juncs_sig <- read.table("/media/theron/My_Passport/Ali_data/analysis/juncs_sig.txt",header=T)
juncs_sig<-sprintf("%s:%s:%s:%s",juncs_sig$chrom,juncs_sig$start,juncs_sig$end,juncs_sig$strand)
rownames(all_sample_psi)<-juncs_sig
proteins<-unique(out_splice$ID)
protein_imm_dat<-vapply(proteins,function(prot){
out_splice_small <- out_splice %>% dplyr::filter(ID==prot)
sum(out_splice_small$N_binders)
},numeric(1))
prot_imm_dat<-data.frame(proteins,protein_imm_dat)
juncs<-splicemutr_dat$juncs[as.numeric(str_replace_all(proteins,"protein",""))]
prot_imm_dat$juncs<-juncs
C_psi <- all_sample_psi[prot_imm_dat$juncs,c(1,5,6,7,8,9)]
prot_imm_dat$C_psi_av<-vapply(seq(nrow(C_psi)),function(row_val){
mean(as.numeric(C_psi[row_val,]),na.rm = T)
},numeric(1))
P_psi <- all_sample_psi[prot_imm_dat$juncs,c(10,11,12,13,14,15)]
prot_imm_dat$P_psi_av<-vapply(seq(nrow(P_psi)),function(row_val){
mean(as.numeric(P_psi[row_val,]),na.rm = T)
},numeric(1))
U_psi <- all_sample_psi[prot_imm_dat$juncs,c(2,3,4)]
prot_imm_dat$U_psi_av<-vapply(seq(nrow(U_psi)),function(row_val){
mean(as.numeric(U_psi[row_val,]),na.rm = T)
},numeric(1))
prot_imm_dat$P_psi_av[is.nan(prot_imm_dat$P_psi_av)]<-0
prot_imm_dat$C_psi_av[is.nan(prot_imm_dat$C_psi_av)]<-0
prot_imm_dat$U_psi_av[is.nan(prot_imm_dat$U_psi_av)]<-0
prot_imm_dat$psi_order <- vapply(seq(nrow(prot_imm_dat)),function(row_val){
vals<-prot_imm_dat[row_val,c("P_psi_av","C_psi_av","U_psi_av")]
if (vals$C_psi_av < vals$P_psi_av & vals$P_psi_av < vals$U_psi_av){
return("combo<pd1<untreated")
} else if (vals$C_psi_av < vals$U_psi_av & vals$U_psi_av < vals$P_psi_av){
return("combo<untreated<pd1")
} else if (vals$P_psi_av < vals$C_psi_av & vals$C_psi_av < vals$P_psi_av){
return("pd1<combo<untreated")
} else if (vals$P_psi_av < vals$U_psi_av & vals$U_psi_av < vals$C_psi_av){
return("pd1<untreated<combo")
} else if (vals$U_psi_av < vals$C_psi_av & vals$C_psi_av < vals$P_psi_av){
return("untreated<combo<pd1")
} else if (vals$U_psi_av < vals$P_psi_av & vals$P_psi_av < vals$C_psi_av){
return("untreated<pd1<combo")
} else {
return("something's equal")
}
},character(1))
prot_imm_dat_linear <- data.frame(rep(prot_imm_dat$proteins,3),rep(prot_imm_dat$protein_imm_dat,3),
c(prot_imm_dat$C_psi_av,prot_imm_dat$P_psi_av,prot_imm_dat$U_psi_av),
c(rep("C_psi_av",nrow(prot_imm_dat)),rep("P_psi_av",nrow(prot_imm_dat)),rep("U_psi_av",nrow(prot_imm_dat))))
colnames(prot_imm_dat_linear)<- c("protein","Imm_count","Mean_psi","Treatment")
ggplot(prot_imm_dat_linear,aes(y=Mean_psi,x=protein))+geom_point(aes(color=Treatment))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
my_comparisons <- list( c("C_psi_av", "P_psi_av"), c("C_psi_av", "U_psi_av"), c("P_psi_av", "U_psi_av") )
ggplot(prot_imm_dat_linear,aes(y=Mean_psi,x=Treatment))+geom_violin(aes(fill=Treatment))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
stat_compare_means(comparisons = my_comparisons)
ggplot(prot_imm_dat_linear,aes(y=Mean_psi,x=Treatment,group=protein,color=protein))+
geom_point()+geom_line()+theme(legend.position = "none")
ggplot(prot_imm_dat_linear,aes(y=Mean_psi,x=log2(Imm_count+1)))+geom_point(aes(color=Treatment))+
geom_smooth(method = "lm")+
stat_cor(method = "spearman")
ggplot(prot_imm_dat,aes(x=psi_order,fill=psi_order))+
geom_bar(aes(y=(..count..)/sum(..count..)))+
theme(axis.text.x=element_blank())
ggplot(prot_imm_dat,aes(x=psi_order,fill=psi_order))+
geom_bar()+
theme(axis.text.x=element_blank())
out_splice<-read.table("/media/theron/My_Passport/Ali_data/analysis/immunogenicity/outlier_splicing_results_CP_PC_NetMHC.txt",header=T)
out_splice <- out_splice %>% dplyr::filter(N_binders>0)
splicemutr_dat <- read.table("/media/theron/My_Passport/Ali_data/analysis/formed_transcripts_CP_PC/data_splicemutr.txt",header=T)
juncs <- sprintf("%s:%s:%s:%s",splicemutr_dat$chr,splicemutr_dat$start,splicemutr_dat$end,splicemutr_dat$cluster)
splicemutr_dat$juncs <- juncs
txdb_file<-"/media/theron/My_Passport/reference_genomes/GTF_GFF/ENSEMBL/Mus_musculus.GRCm38.102.chr.sqlite"
txdb<-loadDb(txdb_file) # making the txdb from gtf
introns_by_tx<-data.frame(unlist(intronsByTranscript(txdb,use.names=T)))
juncs_test<-sprintf("%s:%s:%s:%s",introns_by_tx$seqnames,introns_by_tx$start-1,introns_by_tx$end+1,introns_by_tx$strand)
introns_by_tx$juncs <- juncs_test
splicemutr_dat$juncs
## [1] "1:173936639:173937742:-" "1:173936639:173937742:-"
## [3] "1:173936639:173937742:-" "1:173936639:173937742:-"
## [5] "1:173936639:173937742:-" "1:173936639:173937742:-"
## [7] "1:173936639:173937742:-" "17:36109732:36109860:-"
## [9] "2:35282601:35283905:+" "2:35282601:35283905:+"
## [11] "2:102828610:102853015:-" "2:102828610:102853015:-"
## [13] "2:102828610:102853015:-" "2:102828610:102853015:-"
## [15] "2:102828610:102853015:-" "2:102828610:102853015:-"
## [17] "2:102828610:102853015:-" "2:102828610:102853015:-"
## [19] "4:123554365:123572617:-" "1:63170914:63176586:-"
## [21] "1:63170914:63176586:-" "1:63170914:63176586:-"
## [23] "1:63170914:63176586:-" "1:63170914:63176586:-"
## [25] "1:63170914:63176586:-" "14:105132007:105140316:-"
## [27] "14:105132007:105140316:-" "14:105132007:105140316:-"
## [29] "14:105132007:105140316:-" "17:35264114:35380637:+"
## [31] "17:35264114:35380637:+" "17:35264114:35380637:+"
## [33] "17:35264114:35380637:+" "7:80313131:80315114:+"
## [35] "7:80313131:80315114:+" "7:80313131:80315114:+"
## [37] "7:80313131:80315114:+" "7:80313131:80315114:+"
## [39] "7:80313131:80315114:+" "7:80313131:80315114:+"
## [41] "7:141489743:141491572:+" "7:141489743:141491572:+"
## [43] "7:141489743:141491572:+" "7:141489743:141491572:+"
## [45] "7:141489743:141491572:+" "1:90816535:90830143:-"
## [47] "1:90816535:90830143:-" "1:90816535:90830143:-"
## [49] "1:90816535:90830143:-" "12:69159473:69159530:+"
## [51] "13:22036003:22043362:+" "17:34150825:34151085:+"
## [53] "17:34150825:34151085:+" "17:34150825:34151085:+"
## [55] "19:37968452:37972175:-" "19:37968452:37972175:-"
## [57] "19:37968452:37972175:-" "19:37968452:37972175:-"
## [59] "5:108065965:108080825:+" "5:108065965:108080825:+"
## [61] "5:108065965:108080825:+" "5:108065965:108080825:+"
## [63] "5:108065965:108080825:+" "5:108065965:108080825:+"
## [65] "5:108065965:108080825:+" "5:108065965:108080825:+"
## [67] "5:108065965:108080825:+" "7:28766867:28771811:+"
## [69] "7:28766867:28771811:+" "5:105031319:105103769:-"
## [71] "9:75194137:75197643:+" "9:75194137:75197643:+"
## [73] "9:75194137:75197643:+" "9:75194137:75197643:+"
## [75] "9:75194137:75197643:+" "9:75194137:75197643:+"
## [77] "9:75194137:75197643:+" "11:29708770:29733633:+"
## [79] "11:29708770:29733633:+" "12:113422730:113429781:-"
## [81] "12:113422730:113429781:-" "17:35266514:35266935:+"
## [83] "17:35266514:35266935:+" "17:35383376:35428383:+"
## [85] "17:35383376:35428383:+" "19:11520485:11521671:+"
## [87] "19:11520485:11521671:+" "19:11520485:11521671:+"
## [89] "19:11520485:11521671:+" "19:11520485:11570765:+"
## [91] "19:11520485:11570765:+" "19:11520485:11570765:+"
## [93] "19:11520485:11570765:+" "4:63976568:64006246:-"
## [95] "4:63976568:64006246:-" "4:63976568:64006246:-"
## [97] "5:105135190:105136839:-" "5:105135190:105136839:-"
## [99] "5:105135190:105136839:-" "10:128491033:128492059:-"
## [101] "10:128491033:128492059:-" "10:128491033:128492059:-"
## [103] "10:128491033:128492059:-" "10:128491033:128492059:-"
## [105] "10:128491033:128492059:-" "10:128491033:128492059:-"
## [107] "10:128491033:128492059:-" "10:128491033:128492059:-"
## [109] "6:70722954:70726435:+" "6:70722954:70726435:+"
## [111] "6:70722954:70726435:+" "1:162675841:162676691:-"
## [113] "1:162675841:162676691:-" "1:162675841:162676691:-"
## [115] "1:170143463:170149271:-" "1:170143463:170149271:-"
## [117] "1:170143463:170149271:-" "15:73343359:73365041:-"
## [119] "15:73343359:73394648:-" "15:73343359:73394648:-"
## [121] "15:73343359:73394648:-" "15:73343359:73394648:-"
## [123] "15:73343359:73394648:-" "15:73343359:73394648:-"
## [125] "15:73343359:73394648:-" "17:34950475:34951885:-"
## [127] "17:34950475:34951885:-" "17:34950475:34951885:-"
## [129] "17:34950475:34951885:-" "17:29038624:29040775:+"
## [131] "17:29038624:29040775:+" "17:29038624:29040775:+"
## [133] "3:60501423:60595594:+" "3:60501423:60595594:+"
## [135] "3:60501423:60595594:+" "3:60501423:60595594:+"
## [137] "3:142625622:142629969:+" "3:142625622:142629969:+"
## [139] "3:142625622:142660448:+" "3:142625622:142660448:+"
## [141] "4:139435184:139436122:+" "4:139435184:139436122:+"
## [143] "4:139435184:139436122:+" "4:139435184:139436122:+"
## [145] "4:9635969:9639198:-" "4:9635969:9639198:-"
## [147] "4:9635969:9639198:-" "4:9635969:9639198:-"
## [149] "4:9635969:9639198:-" "4:9635969:9639198:-"
## [151] "4:9635969:9639198:-" "4:9635969:9639198:-"
## [153] "4:9635969:9639198:-" "4:9635969:9639198:-"
## [155] "4:9635969:9639198:-" "4:9635969:9639198:-"
## [157] "4:9635969:9639198:-" "4:9635969:9639198:-"
## [159] "4:9635969:9639198:-" "4:9635969:9639198:-"
## [161] "4:9635969:9639198:-" "4:140568520:140570257:-"
## [163] "4:140568520:140570257:-" "4:140568520:140570257:-"
## [165] "4:140568520:140570257:-" "4:140568520:140570257:-"
## [167] "4:140568520:140570257:-" "4:140568520:140570257:-"
## [169] "5:105031319:105041853:-" "5:105031319:105041853:-"
## [171] "5:105031319:105041853:-" "5:105031319:105050864:-"
## [173] "5:105031319:105050864:-" "5:105031319:105050864:-"
## [175] "5:105041980:105050864:-" "5:105041980:105050864:-"
## [177] "5:105041980:105050864:-" "5:105103896:105105664:-"
## [179] "5:105103896:105105664:-" "5:105103896:105105664:-"
## [181] "5:105137045:105139480:-" "5:105137045:105139480:-"
## [183] "5:105137045:105139480:-" "5:105137045:105139480:-"
## [185] "5:105274380:105274709:-" "5:105274380:105274709:-"
## [187] "5:105274380:105274709:-" "5:105274380:105274709:-"
## [189] "7:19081301:19082276:+" "7:19081301:19082276:+"
## [191] "7:19081301:19082276:+" "7:90191753:90195655:+"
## [193] "7:90191753:90195655:+" "7:90191753:90195655:+"
## [195] "7:90191753:90195655:+" "7:90191753:90195655:+"
## [197] "7:90191753:90195655:+" "7:90191753:90195655:+"
## [199] "7:90191753:90195655:+" "7:90191753:90195655:+"
## [201] "12:73901498:73926557:+" "17:36042112:36120860:-"
## [203] "17:36042112:36120860:-" "17:36042112:36120860:-"
## [205] "17:36042112:36120860:-" "17:36042112:36120860:-"
## [207] "17:36042112:36120860:-" "17:36042112:36120860:-"
## [209] "17:36042112:36120860:-" "17:36042112:36120860:-"
## [211] "17:36042112:36120860:-" "17:36042112:36120860:-"
## [213] "17:36042112:36120860:-" "17:36042112:36120860:-"
## [215] "17:36042112:36120860:-" "17:36042112:36120860:-"
## [217] "17:36042112:36120860:-" "17:36042112:36120860:-"
## [219] "17:36042112:36120860:-" "17:36042112:36120860:-"
## [221] "17:36042112:36120860:-" "5:107799260:107812358:-"
## [223] "5:107799260:107812358:-" "6:117906839:117917470:+"
## [225] "6:117906839:117917470:+" "6:117906839:117917470:+"
## [227] "6:117907208:117917470:+" "6:117907208:117917470:+"
## [229] "1:153143908:153145639:-" "1:153143908:153145639:-"
## [231] "10:51480941:51481667:+" "10:51480941:51481667:+"
## [233] "10:51480941:51481667:+" "10:51480941:51481667:+"
## [235] "10:51480941:51481667:+" "10:51480941:51481667:+"
## [237] "10:51480941:51481667:+" "10:51480941:51481667:+"
## [239] "10:51480941:51481667:+" "10:51480941:51481667:+"
## [241] "10:51480941:51481667:+" "15:76061223:76062670:-"
## [243] "15:76061223:76062670:-" "15:76061223:76062670:-"
## [245] "15:76061223:76062670:-" "17:35266726:35266992:+"
## [247] "2:156079464:156111786:-" "2:156079464:156111786:-"
## [249] "2:156079464:156111786:-" "2:156079464:156111786:-"
## [251] "2:156079464:156111786:-" "2:156079464:156111786:-"
## [253] "2:156079464:156111786:-" "2:156079464:156111786:-"
## [255] "5:105094559:105103769:-" "5:105094559:105103769:-"
## [257] "5:105094559:105103769:-" "7:24918000:24919631:+"
## [259] "7:24918000:24919631:+" "7:24918000:24919631:+"
## [261] "7:24918000:24919631:+" "7:24918000:24919631:+"
## [263] "7:24918000:24919631:+" "7:24918000:24919631:+"
## [265] "10:117361360:117362014:-" "10:117361360:117362014:-"
## [267] "10:117361360:117362014:-" "10:117361360:117362014:-"
## [269] "5:105137027:105293626:-" "5:105137027:105293626:-"
## [271] "5:105137027:105293626:-" "5:105137027:105293626:-"
## [273] "5:105137027:105293626:-" "5:105137027:105293626:-"
## [275] "5:105137027:105293626:-" "5:105137027:105293626:-"
## [277] "5:105137027:105293626:-" "5:105137027:105293626:-"
## [279] "5:105137027:105293626:-" "5:105137027:105293626:-"
## [281] "5:105137027:105293626:-" "5:105137027:105293626:-"
## [283] "5:105137027:105293626:-" "5:105137027:105293626:-"
## [285] "5:105137027:105293626:-" "5:105137027:105293626:-"
## [287] "5:105137027:105293626:-" "5:105137027:105293626:-"
## [289] "5:105137027:105293626:-" "5:105137027:105293626:-"
## [291] "5:105137027:105293626:-" "5:105137027:105293626:-"
## [293] "X:135742978:135744499:+" "X:135742978:135744499:+"
## [295] "X:135742978:135744499:+" "X:135742978:135744499:+"
## [297] "X:135742978:135744499:+" "X:135742978:135744499:+"
annotated_outlier_junctions<-splicemutr_dat$juncs %in% introns_by_tx$juncs
splicemutr_dat$verdict<-"non_ann"
splicemutr_dat$verdict[annotated_outlier_junctions]<-"ann"
table(splicemutr_dat$verdict)
##
## ann non_ann
## 218 80
splicemutr_dat_filt <- splicemutr_dat %>% dplyr::filter(!(verdict == "ann" & modified == "changed"))
all_sample_psi<-read.table("/media/theron/My_Passport/Ali_data/analysis/all_sample_psi_CP_PC.txt",header=T)
all_sample_pvals <- read.table("/media/theron/My_Passport/Ali_data/analysis/all_sample_pval_CP_PC.txt",header=T)
all_sample_counts_norm <- read.table("/media/theron/My_Passport/Ali_data/analysis/all_sample_counts_norm_CP_PC.txt",header=T)
juncs_sig <- read.table("/media/theron/My_Passport/Ali_data/analysis/juncs_sig_CP_PC.txt",header=T)
juncs_sig<-sprintf("%s:%s:%s:%s",juncs_sig$chrom,juncs_sig$start,juncs_sig$end,juncs_sig$strand)
rownames(all_sample_psi)<-juncs_sig
proteins<-unique(out_splice$ID)
protein_imm_dat<-vapply(proteins,function(prot){
out_splice_small <- out_splice %>% dplyr::filter(ID==prot)
sum(out_splice_small$N_binders)
},numeric(1))
prot_imm_dat<-data.frame(proteins,protein_imm_dat)
juncs<-splicemutr_dat$juncs[as.numeric(str_replace_all(proteins,"protein",""))]
prot_imm_dat$juncs<-juncs
C_psi <- all_sample_psi[prot_imm_dat$juncs,c(1,2,3,4,5,6)]
prot_imm_dat$C_psi_av<-vapply(seq(nrow(C_psi)),function(row_val){
mean(as.numeric(C_psi[row_val,]),na.rm = T)
},numeric(1))
prot_imm_dat$C_psi_med<-vapply(seq(nrow(C_psi)),function(row_val){
median(as.numeric(C_psi[row_val,]),na.rm = T)
},numeric(1))
P_psi <- all_sample_psi[prot_imm_dat$juncs,c(7,8,9,10,11,12)]
prot_imm_dat$P_psi_av<-vapply(seq(nrow(P_psi)),function(row_val){
median(as.numeric(P_psi[row_val,]),na.rm = T)
},numeric(1))
prot_imm_dat$P_psi_med<-vapply(seq(nrow(P_psi)),function(row_val){
median(as.numeric(P_psi[row_val,]),na.rm = T)
},numeric(1))
prot_imm_dat$P_psi_av[is.nan(prot_imm_dat$P_psi_av)]<-0
prot_imm_dat$C_psi_av[is.nan(prot_imm_dat$C_psi_av)]<-0
prot_imm_dat$psi_order <- vapply(seq(nrow(prot_imm_dat)),function(row_val){
vals<-prot_imm_dat[row_val,c("P_psi_av","C_psi_av")]
if (vals$P_psi_av < vals$C_psi_av){
return("pd1<combo")
} else if (vals$P_psi_av > vals$C_psi_av){
return("combo<pd1")
} else {
return("something's equal")
}
},character(1))
rownames(all_sample_counts_norm)<-all_sample_counts_norm$juncs
C_norm_counts <- all_sample_counts_norm[prot_imm_dat$juncs,c(1,2,3,4,5,6)]
prot_imm_dat$C_norm_counts<-vapply(seq(nrow(C_norm_counts)),function(row_val){
mean(as.numeric(C_norm_counts[row_val,]),na.rm = T)
},numeric(1))
prot_imm_dat$C_norm_counts_med<-vapply(seq(nrow(C_norm_counts)),function(row_val){
median(as.numeric(C_norm_counts[row_val,]),na.rm = T)
},numeric(1))
P_norm_counts <- all_sample_counts_norm[prot_imm_dat$juncs,c(7,8,9,10,11,12)]
prot_imm_dat$P_norm_counts<-vapply(seq(nrow(P_norm_counts)),function(row_val){
mean(as.numeric(P_norm_counts[row_val,]),na.rm = T)
},numeric(1))
prot_imm_dat$P_norm_counts_med<-vapply(seq(nrow(P_norm_counts)),function(row_val){
median(as.numeric(P_norm_counts[row_val,]),na.rm = T)
},numeric(1))
U_norm_counts <- all_sample_counts_norm[prot_imm_dat$juncs,c(13,14,15)]
prot_imm_dat$U_norm_counts<-vapply(seq(nrow(U_norm_counts)),function(row_val){
mean(as.numeric(U_norm_counts[row_val,]),na.rm = T)
},numeric(1))
prot_imm_dat$U_norm_counts_med<-vapply(seq(nrow(U_norm_counts)),function(row_val){
median(as.numeric(U_norm_counts[row_val,]),na.rm = T)
},numeric(1))
prot_imm_dat$P_norm_counts[is.nan(prot_imm_dat$P_norm_counts)]<-0
prot_imm_dat$P_norm_counts_med[is.nan(prot_imm_dat$P_norm_counts)]<-0
prot_imm_dat$C_norm_counts[is.nan(prot_imm_dat$C_norm_counts)]<-0
prot_imm_dat$C_norm_counts_med[is.nan(prot_imm_dat$C_norm_counts)]<-0
prot_imm_dat$U_norm_counts[is.nan(prot_imm_dat$U_norm_counts)]<-0
prot_imm_dat$U_norm_counts_med[is.nan(prot_imm_dat$U_norm_counts)]<-0
prot_imm_dat$counts_order <- vapply(seq(nrow(prot_imm_dat)),function(row_val){
vals<-prot_imm_dat[row_val,c("P_norm_counts","C_norm_counts","U_norm_counts")]
if (vals$C_norm_counts < vals$P_norm_counts & vals$C_norm_counts < vals$U_norm_counts){
return("combo<pd1<untreated")
} else if (vals$C_norm_counts < vals$U_norm_counts & vals$U_norm_counts < vals$P_norm_counts){
return("combo<untreated<pd1")
} else if (vals$P_norm_counts < vals$C_norm_counts & vals$C_norm_counts < vals$P_norm_counts){
return("pd1<combo<untreated")
} else if (vals$P_norm_counts < vals$U_norm_counts & vals$U_norm_counts < vals$C_norm_counts){
return("pd1<untreated<combo")
} else if (vals$U_norm_counts < vals$C_norm_counts & vals$C_norm_counts < vals$P_norm_counts){
return("untreated<combo<pd1")
} else if (vals$U_norm_counts < vals$P_norm_counts & vals$P_norm_counts < vals$C_norm_counts){
return("untreated<pd1<combo")
} else {
return("something's equal")
}
},character(1))
prot_imm_dat$counts_order_med <- vapply(seq(nrow(prot_imm_dat)),function(row_val){
vals<-prot_imm_dat[row_val,c("P_norm_counts_med","C_norm_counts_med","U_norm_counts_med")]
if (vals$C_norm_counts_med < vals$P_norm_counts_med & vals$C_norm_counts_med < vals$U_norm_counts_med){
return("combo<pd1<untreated")
} else if (vals$C_norm_counts_med < vals$U_norm_counts_med & vals$U_norm_counts_med < vals$P_norm_counts_med){
return("combo<untreated<pd1")
} else if (vals$P_norm_counts_med < vals$C_norm_counts_med & vals$C_norm_counts_med < vals$P_norm_counts_med){
return("pd1<combo<untreated")
} else if (vals$P_norm_counts_med < vals$U_norm_counts_med & vals$U_norm_counts_med < vals$C_norm_counts_med){
return("pd1<untreated<combo")
} else if (vals$U_norm_counts_med < vals$C_norm_counts_med & vals$C_norm_counts_med < vals$P_norm_counts_med){
return("untreated<combo<pd1")
} else if (vals$U_norm_counts_med < vals$P_norm_counts_med & vals$P_norm_counts_med < vals$C_norm_counts_med){
return("untreated<pd1<combo")
} else {
return("something's equal")
}
},character(1))
prot_imm_dat_linear_psi <- data.frame(rep(prot_imm_dat$proteins,2),rep(prot_imm_dat$protein_imm_dat,2),
c(prot_imm_dat$C_psi_av,prot_imm_dat$P_psi_av),
c(rep("C_psi_av",nrow(prot_imm_dat)),rep("P_psi_av",nrow(prot_imm_dat))))
colnames(prot_imm_dat_linear_psi)<- c("protein","Imm_count","Mean_psi","Treatment")
ggplot(prot_imm_dat_linear_psi,aes(y=Mean_psi,x=protein))+geom_point(aes(color=Treatment))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
my_comparisons <- list( c("C_psi_av", "P_psi_av"))
ggplot(prot_imm_dat_linear_psi,aes(y=Mean_psi,x=Treatment))+geom_violin(aes(fill=Treatment))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
stat_compare_means(comparisons = my_comparisons)
ggplot(prot_imm_dat_linear_psi,aes(y=Mean_psi,x=Treatment,group=protein,color=protein))+
geom_point()+geom_line()+theme(legend.position = "none")
ggplot(prot_imm_dat_linear_psi,aes(y=Mean_psi,x=log2(Imm_count+1)))+geom_point(aes(color=Treatment))+
geom_smooth(method = "lm")+
stat_cor(method = "spearman")
prot_imm_dat_linear_counts <- data.frame(rep(prot_imm_dat$proteins,3),rep(prot_imm_dat$protein_imm_dat,3),
c(prot_imm_dat$C_norm_counts,prot_imm_dat$P_norm_counts,prot_imm_dat$U_norm_counts),
c(prot_imm_dat$C_norm_counts_med,prot_imm_dat$P_norm_counts_med,prot_imm_dat$U_norm_counts_med),
c(rep("C_norm_counts",nrow(prot_imm_dat)),
rep("P_norm_counts",nrow(prot_imm_dat)),
rep("U_norm_counts",nrow(prot_imm_dat))))
colnames(prot_imm_dat_linear_counts)<- c("protein","Imm_count","Mean_counts","Median_counts","Treatment")
ggplot(prot_imm_dat_linear_counts,aes(y=Mean_counts,x=protein))+geom_point(aes(color=Treatment))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
my_comparisons <- list( c("C_norm_counts", "P_norm_counts"),c("P_norm_counts", "U_norm_counts"),c("C_norm_counts", "U_norm_counts"))
ggplot(prot_imm_dat_linear_counts,aes(y=Mean_counts,x=Treatment))+geom_violin(aes(fill=Treatment))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
stat_compare_means(comparisons = my_comparisons)
ggplot(prot_imm_dat_linear_counts,aes(y=Mean_counts,x=Treatment,group=protein,color=protein))+
geom_point()+geom_line()+theme(legend.position = "none")
ggplot(prot_imm_dat_linear_counts,aes(y=Mean_counts,x=log2(Imm_count+1)))+geom_point(aes(color=Treatment))+
geom_smooth(method = "lm")+
stat_cor(method = "spearman")
ggplot(prot_imm_dat,aes(x=counts_order,fill=counts_order))+
geom_bar(aes(y=(..count..)/sum(..count..)))+
theme(axis.text.x=element_blank())
ggplot(prot_imm_dat,aes(x=counts_order,fill=counts_order))+
geom_bar()+
theme(axis.text.x=element_blank())
ggplot(prot_imm_dat_linear_counts,aes(y=Median_counts,x=protein))+geom_point(aes(color=Treatment))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
my_comparisons <- list( c("C_norm_counts", "P_norm_counts"),c("P_norm_counts", "U_norm_counts"),c("C_norm_counts", "U_norm_counts"))
ggplot(prot_imm_dat_linear_counts,aes(y=Median_counts,x=Treatment))+geom_violin(aes(fill=Treatment))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
stat_compare_means(comparisons = my_comparisons)
ggplot(prot_imm_dat_linear_counts,aes(y=Median_counts,x=Treatment,group=protein,color=protein))+
geom_point()+geom_line()+theme(legend.position = "none")
ggplot(prot_imm_dat_linear_counts,aes(y=Median_counts,x=log2(Imm_count+1)))+geom_point(aes(color=Treatment))+
geom_smooth(method = "lm")+
stat_cor(method = "spearman")
ggplot(prot_imm_dat,aes(x=counts_order_med,fill=counts_order_med))+
geom_bar(aes(y=(..count..)/sum(..count..)))+
theme(axis.text.x=element_blank())
ggplot(prot_imm_dat,aes(x=counts_order_med,fill=counts_order_med))+
geom_bar()+
theme(axis.text.x=element_blank())